In [1]:
from datetime import datetime
import logging
import os
import sys
import numba as nb
from pC2DMS import *

import matplotlib.pyplot as plt

In [None]:

start = datetime.now()
logging.basicConfig(filename='report_run_time.log', filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)

# file_name  peptide_sequence
# ME14_3+:   VTIMPK(Ac)DIQLAR
# PH4_3+     EQFDDY(p)GHMRF(NH2) 
# ME9_3+:    GWGR(Me2)EENLFSWK
# ME16_3+:   VTIMPKDIQLAR
# 7255:      ALDLLDRNYLQSLPSK
# 7732:      KFIFRTAGTAGR
# 6727:      DQARVAPSSSDPKSKFF
# 7302:      HGMTVVIRKKF
# 3510:      GSHQISLDNPDYQQDFFPK
loss = 0
for dirname in os.listdir('./peptide output/'):
    for filename in os.listdir('./peptide output/' + str(dirname)):
        if filename == '.ipynb_checkpoints':
            continue
        if filename == '20150626_1623_PH15_2+_CID_NCE_10_CVscan_scanTime60mins':
            continue
        sub_start = datetime.now()
        numscan_list = []
        for dir_name in os.listdir('./peptide output/' + str(dirname) + '/' + str(filename) + '/'):
            if len(dir_name.split(' ')) == 2:
                numscan_list.append(int(dir_name.split(' ')[0]))
        numscan_list.sort()
        if os.path.exists('./peptide output/' + str(dirname) + '/' + str(filename) + '/' + str(numscan_list[-1])+' scans/'+ filename +'_histogram.jpg'):
            continue
        sys.argv = [sys.argv[0], '--parpath', './peptide output/' + str(dirname) + '/' + str(filename) + '/', '--name', str(filename), '--numscan_list', numscan_list]
        %run report.ipynb 
        print('\n')
        sub_stop = datetime.now()
        logging.info(f"The running time of {filename}: {sub_stop - sub_start}")
        # scan_number = numscan_list[-1]
        # loss += 1 - np.load('./peptide output/' + str(dirname) + '/' + str(filename) + '/' + str(scan_number)+' scans/'+ name +'.npy', allow_pickle=True)[0]


In [2]:

def calculate_speed_of_convergence(x: np.ndarray) -> float:
    """
    Computes the speed of convergence

    Args:
        x: The array of values to compute the speed of convergence for (must be at least 2 elements long)

    Returns:
        The speed of convergence
    """
    rate_of_change = np.diff(x)
    weights = np.ones(len(rate_of_change)) # np.arange(len(rate_of_change), 0, -1) / len(rate_of_change)
    weights_sum = np.sum(weights)
    if weights_sum == 0:
        # Handle the case when weights sum to zero
        return# n 1.0

    speed_of_convergence = np.average(rate_of_change, weights=weights).astype(np.float32)
    return speed_of_convergence

In [3]:

from sklearn.metrics import average_precision_score

def objective_function(df, k):
    true_labels = df['interpretation 1'].ne('')
    predictions = df['normalised score'].astype('float')

    # Calculate cost-sensitive conditional risk
    false_positive_cost = 10.0  
    false_negative_cost = 0.01

    true_positive = np.logical_and(true_labels, predictions >= 0)
    false_positive = np.logical_and(~true_labels, predictions >= 0)
    true_negative = np.logical_and(~true_labels, predictions < 0)
    false_negative = np.logical_and(true_labels, predictions < 0)

    cost_sensitive_conditional_risk = (np.sum(false_positive) * false_positive_cost +
                                       np.sum(false_negative) * false_negative_cost) / len(df)
    
    predictions = (predictions - np.nanmin(predictions)) / (np.nanmax(predictions) - np.nanmin(predictions))

    # Calculate average precision
    average_precision = average_precision_score(true_labels, predictions)

    # Calculate top-k mean average precision
    top_k_predictions = np.argsort(predictions)[-k:]
    top_k_true_labels = true_labels[top_k_predictions]
    top_k_average_precision = average_precision_score(top_k_true_labels, top_k_predictions)
    
    print('top_k_average_precision: ', top_k_average_precision)
    print('cost_sensitive_conditional_risk: ', cost_sensitive_conditional_risk)
    objective_value = (0.99 * top_k_average_precision) - (0.01 * cost_sensitive_conditional_risk)

    return objective_value


In [4]:
import timeit
import functools
import pC2DMS
import pC2DMSUtils
import numpy as np
import pandas as pd
import numba as nb
import torch
import itertools
import gc
from scipy import integrate

logging.basicConfig(filename='optimize_weights.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def calculate_whole_score_list(numscan_list, top_score, new_top_feat_lows, tolerance):
    whole_score_list = []
    for j in top_score:
        score_list = []
        for i in numscan_list:
            matches = ((abs(torch.tensor(float(j[2]) - new_top_feat_lows[i // 1000 - 1][:, 0].astype(float))) <= tolerance) &
                       (abs(torch.tensor(float(j[3]) - new_top_feat_lows[i // 1000 - 1][:, 1].astype(float))) <= tolerance))
            score_list.extend([[float(j[0]), int(i), 0]] + [
                [float(j[0]), int(i), float(k[3])]
                for k in new_top_feat_lows[i // 1000 - 1][matches]
            ])
        score_list = np.array(score_list)
        whole_score_list.append(score_list)
    return whole_score_list

def loss_func1(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    df['normalised score']=df['normalised score'].astype(float)
    count = [] 
    ## if there is no interpretations, array appends '1',
    ## if there are interpretations, array appends '0'
    for i in nb.prange(30):
        if df.iloc[i]['interpretation 1']  == '':
            count.append(1)
        else:
            count.append(0)
    newlist=np.array(count)
    y_list = []
    x_list = []
    x_value = 0
    while len(newlist) >= 10:
        # count the sum of array for the first 10 peaks
        # the sum is equivalent to the number of unidentified peaks
        first_ten = newlist[:10]
        y_list.append(np.sum(first_ten))
         # count the sum of array for next 10 peaks
        newlist = newlist[10:]
        # create an array for x-axis array
        x_value = x_value+10
        x_list.append(x_value)
    
    print('Number of unidentified peaks: ', y_list)
    print('Weights: ', weights)

    '''# Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights.npy')):
        os.remove(os.path.join(scan_dir, 'weights.npy'))
    np.save(os.path.join(scan_dir, 'weights.npy'), weights)'''
    
    return np.sum(y_list)
    

2023-08-16 10:17:46,881 - numexpr.utils - INFO - Note: detected 256 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
2023-08-16 10:17:46,883 - numexpr.utils - INFO - Note: NumExpr detected 256 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.


In [5]:

def loss_func2(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    print('unidentified_index: ', unidentified_index)
    print('weights: ', weights)
    return -unidentified_index

In [6]:

def loss_func3(scan_dir, weights, numScans='all', top_n=600):
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.analyse(top_n)
    # Cut off diagonal with 5.5 Da
    new_top_feat=[]
    for i in topfeat:
        if abs(torch.tensor(i[0]-i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat)
    df = pd.DataFrame(new_top_feat, columns=['m/z A', 'm/z B', 'CorrelationScore', 'NormalisedScore'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    df2 = df.sort_values(by=['NormalisedScore'], ascending=False)
    new_top_feat = df2.to_numpy()
    max_number = 1844.9996199999998
    # Find the position where the fragments mass exceeds that of peptide
    for j in np.arange(len(new_top_feat)):
        if df.iloc[j]['m/z A'] + df.iloc[j]['m/z B'] > max_number:
            outlier_index = j + 1
            break
    for j in np.arange(len(new_top_feat)):
        if df.iloc[j]['m/z A'] + df.iloc[j]['m/z B'] > max_number:
            outlier_index = j + 1
            break
    print('outlier_index', outlier_index)
    print('weights', weights)

    # Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights.npy')):
        os.remove(os.path.join(scan_dir, 'weights.npy'))
    np.save(os.path.join(scan_dir, 'weights.npy'), weights)

    return -outlier_index

In [7]:

def loss_func4(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    score = objective_function(df, 30)
    
    # Save weights
    '''if os.path.exists(os.path.join(scan_dir, 'weights_de.npy')):
        os.remove(os.path.join(scan_dir, 'weights_de.npy'))
    np.save(os.path.join(scan_dir, 'weights_de.npy'), weights)'''
    
    print("weights: ", weights)
    print("Score: ", score)
    print(df[:50])
    
    return -score

In [8]:

def loss_func5(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    
    top_number = (unidentified_index // 10) * 10

    scan_low = pC2DMS.Scan(os.path.abspath(os.path.join(scan_dir, os.pardir)) + '/' + str(1000) + ' scans/')
    top_feat_low = pC2DMS.PCovMap(scan_low, scan_low.weightFunc(weights)).sampleFeats(indexList)
    new_top_feat_low = top_feat_low[np.abs(top_feat_low[:, 0] - top_feat_low[:, 1]) >= 5.5]
    new_top_feat_low = new_top_feat_low[np.flip(new_top_feat_low[:, 3].argsort())]
    count = []
    for z, j in itertools.product(topfeat[:top_number+1], new_top_feat_low[:top_number+1]):
        if torch.allclose(torch.tensor([z[0] - j[0], z[1] - j[1]], dtype=torch.float),
                          torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
            count.append(1)
        if torch.allclose(torch.tensor([z[0] - j[1], z[1] - j[0]], dtype=torch.float),
                          torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
            count.append(1)
            
    print('number of correlations included in ' + str(10000) + ' scans within top '+ 
           str(top_number)+ 'peaks: ' , len(count))
    print('weights', weights)
    
    '''# Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights.npy')):
        os.remove(os.path.join(scan_dir, 'weights.npy'))
    np.save(os.path.join(scan_dir, 'weights.npy'), weights)'''
    
    return -len(count)

In [9]:

def loss_func6(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    print('unidentified_index: ', unidentified_index)
    
    top_number = (unidentified_index // 10) * 10

    numscan_list = [1000, 9000, 10000]
    num_array = []
    new_top_feat_lows = []

    for i in numscan_list[:-1]:
        scan_low = pC2DMS.Scan(os.path.abspath(os.path.join(scan_dir, os.pardir)) + '/' + str(i) + ' scans/')
        top_feat_low = pC2DMS.PCovMap(scan_low, scan_low.weightFunc(weights)).sampleFeats(indexList)
        new_top_feat_low = top_feat_low[np.abs(top_feat_low[:, 0] - top_feat_low[:, 1]) >= 5.5]
        new_top_feat_low = new_top_feat_low[np.flip(new_top_feat_low[:, 3].argsort())]
        count = []
        for z, j in itertools.product(new_top_feat[:top_number], new_top_feat_low[:top_number]):
            if torch.allclose(torch.tensor([z[0] - j[0], z[1] - j[1]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
            if torch.allclose(torch.tensor([z[0] - j[1], z[1] - j[0]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
        num_array.append(len(count))
        new_top_feat_lows.append(new_top_feat_low)
        del top_feat_low, new_top_feat_low, count
        gc.collect()
    num_array.append(top_number)
    new_top_feat_lows.append(new_top_feat)

    print('num_array', num_array)
    print('weights', weights)
    
    '''# Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights.npy')):
        os.remove(os.path.join(scan_dir, 'weights.npy'))
    np.save(os.path.join(scan_dir, 'weights.npy'), weights)'''
    
    num_array = np.array(num_array)
    speed_of_convergence = calculate_speed_of_convergence(np.array(num_array)) / top_number

    gc.collect()
    
    return - np.log(unidentified_index) - np.log(speed_of_convergence)

In [10]:

def loss_func7(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    score = objective_function(df, 100)
    outlier_index = next((j+1 for j, row in df.iterrows() if row['m/z 1'] + row['m/z 2'] > max_number), None)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    print('outlier_index: ', outlier_index)
    print('unidentified_index: ', unidentified_index)
    
    top_number = (unidentified_index // 10) * 10

    numscan_list = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]
    num_array = []
    new_top_feat_lows = []

    for i in numscan_list[:-1]:
        scan_low = pC2DMS.Scan(os.path.abspath(os.path.join(scan_dir, os.pardir)) + '/' + str(i) + ' scans/')
        top_feat_low = pC2DMS.PCovMap(scan_low, scan_low.weightFunc(weights)).sampleFeats(indexList)
        new_top_feat_low = top_feat_low[np.abs(top_feat_low[:, 0] - top_feat_low[:, 1]) >= 5.5]
        new_top_feat_low = new_top_feat_low[np.flip(new_top_feat_low[:, 3].argsort())]
        new_top_feat_lows.append(new_top_feat_low)
        count = []
        for z, j in itertools.product(new_top_feat[:top_number], new_top_feat_low[:top_number]):
            if torch.allclose(torch.tensor([z[0] - j[0], z[1] - j[1]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
            if torch.allclose(torch.tensor([z[0] - j[1], z[1] - j[0]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
        num_array.append(len(count))
        del top_feat_low, new_top_feat_low, count
        gc.collect()
    num_array.append(top_number)
    new_top_feat_lows.append(new_top_feat)

    print('num_array', num_array)
    print('weights', weights)
    
    top_number = 16
    top_score = np.array([
        [j[0], j[1], j[2], float(j[3]), j[4], j[6]]
        for j in plus_index
        for i in nb.prange(top_number)
        if j[0] == i + 1 and j[1] != ''
    ])

    new_top_feat_lows = np.array(new_top_feat_lows, dtype=object)

    whole_score_list = calculate_whole_score_list(numscan_list, top_score, new_top_feat_lows, tolerance)
    
    speed_of_convergence_list = []
    for a in whole_score_list:
        if len(a) > 0:
            denominator = a[-1, 2] - a[0, 2]
            if denominator == 0:
                speed_of_convergence_list.append(calculate_speed_of_convergence(a[:, 2]) / 1000)
            else:
                speed_of_convergence_list.append(calculate_speed_of_convergence(a[:, 2]) / denominator)
    speed_of_convergence_list = np.array(speed_of_convergence_list)

    num_array = np.array(num_array)
    speed_of_convergence = calculate_speed_of_convergence(np.array(num_array)) / ((unidentified_index // 10) * 10)

    gc.collect()
    
    if len(speed_of_convergence_list) == 0:
        print("Loss: ", - np.log(outlier_index) - np.log(unidentified_index) - 0.1 * np.log(score) - np.log(speed_of_convergence))
        return - np.log(outlier_index) - np.log(unidentified_index) - 0.1 * np.log(score) - np.log(speed_of_convergence)
    else:
        mean_speed_of_convergence = np.mean(speed_of_convergence_list)
        print("Loss: ", - np.log(outlier_index) - np.log(unidentified_index) - 0.1 * np.log(score) - np.log(speed_of_convergence) - np.log(mean_speed_of_convergence))
        return - np.log(outlier_index) - np.log(unidentified_index) - 0.1 * np.log(score) - np.log(speed_of_convergence) - np.log(mean_speed_of_convergence)

In [11]:

def loss_func8(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    
    top_number = (unidentified_index // 10) * 10

    numscan_list = [1000, 9000, 10000]
    num_array = []
    new_top_feat_lows = []

    for i in numscan_list[:-1]:
        scan_low = pC2DMS.Scan(os.path.abspath(os.path.join(scan_dir, os.pardir)) + '/' + str(i) + ' scans/')
        top_feat_low = pC2DMS.PCovMap(scan_low, scan_low.weightFunc(weights)).sampleFeats(indexList)
        new_top_feat_low = top_feat_low[np.abs(top_feat_low[:, 0] - top_feat_low[:, 1]) >= 5.5]
        new_top_feat_low = new_top_feat_low[np.flip(new_top_feat_low[:, 3].argsort())]
        count = []
        for z, j in itertools.product(new_top_feat[:top_number], new_top_feat_low[:top_number]):
            if torch.allclose(torch.tensor([z[0] - j[0], z[1] - j[1]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
            if torch.allclose(torch.tensor([z[0] - j[1], z[1] - j[0]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
        num_array.append(len(count))
        new_top_feat_lows.append(new_top_feat_low)
        del top_feat_low, new_top_feat_low, count
        gc.collect()
    num_array.append(top_number)
    new_top_feat_lows.append(new_top_feat)


    print('num_array', num_array)
    print('weights', weights)
    
    '''# Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights_da.npy')):
        os.remove(os.path.join(scan_dir, 'weights_da.npy'))
    np.save(os.path.join(scan_dir, 'weights_da.npy'), weights)'''

    num_array = np.array(num_array)
    speed_of_convergence = calculate_speed_of_convergence(np.array(num_array)) / top_number

    gc.collect()
    
    return -speed_of_convergence

In [12]:

def loss_func9(scan_dir, weights, indexList, numScans='all'):
    print(scan_dir)
    scan1 = pC2DMS.Scan(scan_dir)
    cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans=numScans)
    topfeat = cmap.sampleFeats(indexList)
    tolerance = 0.8
    tolerance_float = torch.tensor(tolerance, dtype=torch.float)
    # Cut off diagonal with 5.5 Da
    new_top_feat = []
    for i in topfeat:
        if abs(torch.tensor(i[0] - i[1])) >= 5.5:
            new_top_feat.append(i)
    new_top_feat = np.array(new_top_feat, dtype=object)
    max_number = 1844.9996199999998
    sequence = np.load(os.path.join(scan_dir, 'sequence.npy'), allow_pickle=True)
    correlation = []
    # compare the measured fragments pairs to theoretical values list
    # if the difference is smaller than or equal to the tolerance, 
    # the array created will record this pair
    for i in new_top_feat:
        for j in sequence:
            if abs(torch.tensor(i[0]-j[2])) <= tolerance and abs(torch.tensor(i[1]-j[1])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[2])**2+(i[1]-j[1])**2)
                correlation.append([str(j[3]), i[0], i[1], str(j[0]), i[2], i[3], j[4], mass_deviation.round(3)])
            elif abs(torch.tensor(i[0]-j[1])) <= tolerance and abs(torch.tensor(i[1]-j[2])) <= tolerance:
                mass_deviation = np.sqrt((i[0]-j[1])**2+(i[1]-j[2])**2)
                correlation.append([str(j[0]), i[0], i[1], str(j[3]), i[2], i[3], j[4], mass_deviation.round(3)])
    correlation = np.array(correlation)
    correlation[:,1:3]
    correlation_list = correlation[:,1:3].astype(float).tolist()
    # if there is no match with the theoretical values, will record the pair as unidentified fragments
    unidentify = []
    for item in new_top_feat.tolist():
        if item[:2] not in correlation_list:
            unidentify.append(['',item[0],item[1],'',item[2],item[3],'',''])
    correlation_new = correlation.tolist()+unidentify
    correlation_new = np.array(correlation_new)
    # sort the fragments according to their normalised correlation scores
    mapp = np.array([float(x) for x in correlation_new[:, 5]])
    correlation_new = correlation_new[np.flip(mapp.argsort())]

    df = pd.DataFrame(correlation_new, columns=['Interpretation A','m/z A', 'm/z B', 'Interpretation B',
                     'CorrelationScore', 'NormalisedScore', 'Plausibility','MassDeviation'])
    df.NormalisedScore = df.NormalisedScore.astype(float)
    # sort the fragments according to normalisation scores and then to their interpretation plausibility and then to
    # their mass deviation from theoretical values
    df2 = df.sort_values(by=['NormalisedScore','Plausibility','MassDeviation'], ascending = [False, True, True])
    correlation_new=df2.to_numpy()
    # add index number to fragments
    # different index numbers refer to different pairs
    plus_index =[]
    number=1
    for row in nb.prange(len(correlation_new)):
        if row < 1:
            plus_index.append(np.insert(correlation_new[row],0,number))
        elif row >= 1:
            if correlation_new[row][1]==correlation_new[row-1][1] and correlation_new[row][2]==correlation_new[row-1][2]:
                plus_index.append(np.insert(correlation_new[row],0,''))

            else:
                number = number+1
                plus_index.append(np.insert(correlation_new[row],0,number))
    df=pd.DataFrame(plus_index, columns=['Index', 'interpretation 1','m/z 1','m/z 2','interpretation 2','score',
                                         'normalised score','Plausibility','MassDeviation'])
    # drop repeated rows accoring to column m/z 1 & m/z (delete those ones with more than 1 interpretation)
    df=df.drop_duplicates(subset=['m/z 1', 'm/z 2'], keep="first").reset_index(drop=True)
    df['m/z 1']=df['m/z 1'].astype(float)
    df['m/z 2']=df['m/z 2'].astype(float)
    unidentified_index = next((j+1 for j, row in df.iterrows() if row['interpretation 1'] == ''), None)
    
    top_number = (unidentified_index // 10) * 10

    numscan_list = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

    converge_index = 10
    for i in numscan_list[:-1]:
        scan_low = pC2DMS.Scan(os.path.abspath(os.path.join(scan_dir, os.pardir)) + '/' + str(i) + ' scans/')
        top_feat_low = pC2DMS.PCovMap(scan_low, scan_low.weightFunc(weights)).sampleFeats(indexList)
        new_top_feat_low = top_feat_low[np.abs(top_feat_low[:, 0] - top_feat_low[:, 1]) >= 5.5]
        new_top_feat_low = new_top_feat_low[np.flip(new_top_feat_low[:, 3].argsort())]
        count = []
        for z, j in itertools.product(new_top_feat[:top_number], new_top_feat_low[:top_number]):
            if torch.allclose(torch.tensor([z[0] - j[0], z[1] - j[1]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1)
            if torch.allclose(torch.tensor([z[0] - j[1], z[1] - j[0]], dtype=torch.float),
                              torch.tensor([0.0, 0.0], dtype=torch.float), atol=tolerance_float):
                count.append(1) 
        if abs(torch.tensor(len(count) - top_number)) <= unidentified_index // 10:
            converge_index = i // 1000

    print('converge_index: ', converge_index)
    print('weights: ', weights)
    
    '''# Save weights
    if os.path.exists(os.path.join(scan_dir, 'weights.npy')):
        os.remove(os.path.join(scan_dir, 'weights.npy'))
    np.save(os.path.join(scan_dir, 'weights.npy'), weights)'''
    
    return converge_index

In [13]:

from typing import Tuple
import pC2DMS

def optimize_weights(dirname, filename, loss_function, first_time=False, last_time=False, opti_range=[23,100]):
    numscan_list = [int(os.path.splitext(d.name)[0].split(' ')[0]) for d in os.scandir(os.path.join('./peptide output', dirname, filename)) if d.is_dir() and len(os.path.splitext(d.name)[0].split(' ')) == 2]
    numscan_list.sort()
    scan_number = numscan_list[-1]
    scan_dir = os.path.join('./peptide output', dirname, filename, f'{numscan_list[-1]} scans')
    weights=None
    scan1 = pC2DMS.Scan(scan_dir)
    if os.path.exists('weights_bayopt.npy'):
        weights = np.load('weights_bayopt.npy')
    if os.path.exists(os.path.join(scan_dir, 'top_indices_tic.npy')):
        indexlist = np.load(os.path.join(scan_dir, 'top_indices_tic.npy'))
    else:
        cmap = pC2DMS.PCovMap(scan1, scan1.tic(), numScans=scan_number)
        indexlist = cmap.topNfeats(3000)
        topfeat = cmap.sampleFeatsIndex(indexlist)
        topfeat_sorted = topfeat[np.flip(topfeat[:, 3].argsort())]
        np.save(os.path.join(scan_dir, 'top_indices_tic.npy'), topfeat_sorted)
    #     new_template = np.copy(cmap.array)
    #     new_template.fill(0)
        np.savetxt(os.path.join(scan_dir, 'top_indices_tic.csv'), topfeat_sorted, fmt = '%.2f', delimiter=',')
    if first_time == True or last_time == True:
        sub_start = timeit.default_timer()
        '''logging.info(f"Computing initial PCov for {filename}")
        sys.argv=[sys.argv[0], '--weight', weights, '--dirname', dirname, '--name', filename]
        %run torch.ipynb'''
        logging.info(f"Computing initial loss for {filename}")
        sys.argv=[sys.argv[0], '--weight', weights, '--dirname', dirname, '--name', filename, 
                  '--parpath', os.path.join('./peptide output', dirname, filename), '--mode', 'w']
        %run report.ipynb
        loss = np.load(os.path.join('./peptide output', dirname, filename, f'{scan_number} scans/some_results.npy'), allow_pickle=True)[1:]
        logging.info(f"The running time of {filename}: {timeit.default_timer() - sub_start}")
        loss = loss[0] # * loss[1] * loss[2]
        logging.info(f"Weights: {weights}")
        logging.info(f"Loss: {loss}")

    if not last_time:
        logging.info(f"Optimizing weights for {filename}")
        sub_start = timeit.default_timer()
        # for epoch in np.arange(3):
        if weights is None:
            weights, loss = scan1.optimizeWeights(indexList=indexlist[opti_range[0]:opti_range[1]], loss_function=loss_function, scan_dir=scan_dir, weights=weights) 
        else:
            weights, loss = scan1.optimizeWeights(indexList=indexlist[opti_range[0]:opti_range[1]], loss_function=loss_function, scan_dir=scan_dir, weights=weights) 

        logging.info(f"Weights: {weights}")
        logging.info(f"Loss: {loss}")
        '''if os.path.exists('weights_bayopt.npy'):
            os.remove('weights_bayopt.npy')
        np.save('weights_bayopt.npy', weights)'''
        '''if len(weights) < scan1.scanList.shape[1] // (100 * (3-epoch)):
                weights = np.repeat(weights, (3-epoch))'''
        logging.info(f"The running time of {filename}: {timeit.default_timer() - sub_start}")
    return weights, loss

In [14]:
datafile_dict = {}

for dirname in os.listdir('../IC/IC/raw data/'):
    datafile_dict[dirname] = []
    for filename in os.listdir('../IC/IC/raw data/' + str(dirname)):
        datafile_dict[dirname].append(os.path.splitext(filename)[0])

In [15]:

weights, loss = {}, {}

In [16]:
print(datafile_dict.keys())

dict_keys(['CID', '.ipynb_checkpoints', 'Test', 'HCD'])


In [None]:
for i in range(0, 1000):
    print("epoch: " + str(int(i * 14)) + " to " + str(int(i * 12 + 12)))
    weights['20160428_2100_ME16_3+_CVscan_NCE35_Turbo'], loss['20160428_2100_ME16_3+_CVscan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160428_2100_ME16_3+_CVscan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[66,366])
    # weights['7255_2d-PC-MS_10pmol-ul_1AGC_0-7quadiso_CID'], loss['7255_2d-PC-MS_10pmol-ul_1AGC_0-7quadiso_CID'] = optimize_weights(list(datafile_dict)[2], '7255_2d-PC-MS_10pmol-ul_1AGC_0-7quadiso_CID', loss_function=loss_func2,  opti_range=[23,323])
    # weights['6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD'], loss['6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD'] = optimize_weights(list(datafile_dict)[2], '6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD', loss_function=loss_func2,  opti_range=[8,308])
    weights['20160428_2222_ME16_2+_CVScan_NCE35_Turbo'], loss['20160428_2222_ME16_2+_CVScan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160428_2222_ME16_2+_CVScan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[28, 328])
    weights['20160504_0930_ME4_3+_CVscan_NCE35_Turbo'], loss['20160504_0930_ME4_3+_CVscan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160504_0930_ME4_3+_CVscan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[56, 356])
    weights['20160511_2003_ME17_2+_CVscan_Turbo'], loss['20160511_2003_ME17_2+_CVscan_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160511_2003_ME17_2+_CVscan_Turbo', loss_function=loss_func2,  opti_range=[40, 340])
    weights['20160602_1249_ME8_3+_CVscan_NCE35_Turbo'], loss['20160602_1249_ME8_3+_CVscan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160602_1249_ME8_3+_CVscan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[48, 348])
    weights['20160603_1005_ME9_2+_CVscan_NCE35_Turbo'], loss['20160603_1005_ME9_2+_CVscan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160603_1005_ME9_2+_CVscan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[23, 323])
    weights['20160603_1040_ME9_3+_CVscan_NCE35_Turbo'], loss['20160603_1040_ME9_3+_CVscan_NCE35_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160603_1040_ME9_3+_CVscan_NCE35_Turbo', loss_function=loss_func2,  opti_range=[34, 334])
    weights['20160622_1448_ME14_2+_1to2500_CVscan_NCE35'], loss['20160622_1448_ME14_2+_1to2500_CVscan_NCE35'] = optimize_weights(list(datafile_dict)[2], '20160622_1448_ME14_2+_1to2500_CVscan_NCE35', loss_function=loss_func2, opti_range=[33, 333])
    weights['20160622_1514_ME14_3+_1to2500_CVscan_NCE35'], loss['20160622_1514_ME14_3+_1to2500_CVscan_NCE35'] = optimize_weights(list(datafile_dict)[2], '20160622_1514_ME14_3+_1to2500_CVscan_NCE35', loss_function=loss_func2, opti_range=[55, 355])
    # weights['20160629_1738_ME15_3+_CVscan_NCE35'], loss['20160629_1738_ME15_3+_CVscan_NCE35'] = optimize_weights(list(datafile_dict)[2], '20160629_1738_ME15_3+_CVscan_NCE35', loss_function=loss_func2, opti_range=[27, 327])
    weights['20160708_1747_UN14_2+_0,01mM_CVscan_Turbo'], loss['20160708_1747_UN14_2+_0,01mM_CVscan_Turbo'] = optimize_weights(list(datafile_dict)[2], '20160708_1747_UN14_2+_0,01mM_CVscan_Turbo', loss_function=loss_func2, opti_range=[27, 327])
    weights['PH8_2+_CVscan_NCE35_Turbo_20160505_1308'], loss['PH8_2+_CVscan_NCE35_Turbo_20160505_1308'] = optimize_weights(list(datafile_dict)[2], 'PH8_2+_CVscan_NCE35_Turbo_20160505_1308', loss_function=loss_func2, opti_range=[20, 320])
    

2023-08-16 10:18:19,301 - root - INFO - Optimizing weights for 20160428_2100_ME16_3+_CVscan_NCE35_Turbo


epoch: 280 to 294
./peptide output/Test/20160428_2100_ME16_3+_CVscan_NCE35_Turbo/10000 scans
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
unidentified_index:  5
weights:  [2. 0. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 2. 2. 2. 2. 0. 2. 2. 2. 2. 2.
 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2.]
|   iter    |  target   |    w_0    |    w_1    |   w_10    |   w_11    |   w_12    |   w_13    |   w_14    |   w_15    |   w_16    |   w_17    |   w_18    |   w_19    |    w_2    |   w_20    |   w_21    |   w_22    |   w_23    |   w_24    |   w_25    |   w_26    |   w_27    |   w_28    |   w_29    |    w_3    |   w_30    |   w_31    |   w_32    |   w_33    |   w_34    |   w_35    |   w_36    |   w_37    |   w_38    |    w_4    |    w_5    |    w_6    |    w_7    |    w_8    |    w_9    |
---------------------------------------------------------------------------

2023-08-16 10:40:40,405 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 10:40:40,407 - root - INFO - Loss: -4.0
2023-08-16 10:40:40,407 - root - INFO - The running time of 20160428_2100_ME16_3+_CVscan_NCE35_Turbo: 1341.1053021512926
2023-08-16 10:40:40,518 - root - INFO - Optimizing weights for 20160428_2222_ME16_2+_CVScan_NCE35_Turbo


unidentified_index:  4
weights:  [2. 2. 0. 0. 0. 2. 2. 2. 2. 2. 0. 0. 0. 2. 0. 2. 2. 0. 2. 0. 2. 2. 0. 0.
 2. 2. 0. 2. 2. 0. 2. 0. 0. 0. 0. 0. 2. 2. 0.]
| [0m395      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 11:00:42,149 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 11:00:42,150 - root - INFO - Loss: -4.0
2023-08-16 11:00:42,151 - root - INFO - The running time of 20160428_2222_ME16_2+_CVScan_NCE35_Turbo: 1201.6313564833254
2023-08-16 11:00:42,306 - root - INFO - Optimizing weights for 20160504_0930_ME4_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [2. 2. 2. 2. 0. 0. 2. 2. 2. 2. 0. 0. 0. 2. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2.
 2. 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 2. 2. 0.]
| [0m396      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 11:22:41,265 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 11:22:41,266 - root - INFO - Loss: -4.0
2023-08-16 11:22:41,267 - root - INFO - The running time of 20160504_0930_ME4_3+_CVscan_NCE35_Turbo: 1318.9596207085997


unidentified_index:  4
weights:  [0. 2. 0. 0. 0. 2. 0. 2. 2. 2. 0. 2. 0. 2. 0. 2. 0. 0. 0. 0. 2. 2. 0. 0.
 0. 0. 0. 0. 0. 2. 0. 0. 0. 2. 0. 0. 2. 2. 2.]
| [0m397      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m |


2023-08-16 11:22:41,510 - root - INFO - Optimizing weights for 20160511_2003_ME17_2+_CVscan_Turbo


./peptide output/Test/20160511_2003_ME17_2+_CVscan_Turbo/10000 scans
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
unidentified_index:  2
weights:  [2. 0. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 2. 2. 2. 2. 0. 2. 2. 2. 2. 2.
 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2.]
|   iter    |  target   |    w_0    |    w_1    |   w_10    |   w_11    |   w_12    |   w_13    |   w_14    |   w_15    |   w_16    |   w_17    |   w_18    |   w_19    |    w_2    |   w_20    |   w_21    |   w_22    |   w_23    |   w_24    |   w_25    |   w_26    |   w_27    |   w_28    |   w_29    |    w_3    |   w_30    |   w_31    |   w_32    |   w_33    |   w_34    |   w_35    |   w_36    |   w_37    |   w_38    |    w_4    |    w_5    |    w_6    |    w_7    |    w_8    |    w_9    |
---------------------------------------------------------------------------------------------------

2023-08-16 11:41:47,514 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 11:41:47,515 - root - INFO - Loss: -4.0
2023-08-16 11:41:47,516 - root - INFO - The running time of 20160511_2003_ME17_2+_CVscan_Turbo: 1146.0049841217697
2023-08-16 11:41:47,627 - root - INFO - Optimizing weights for 20160602_1249_ME8_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [2. 2. 0. 0. 0. 2. 2. 2. 2. 2. 0. 0. 0. 2. 2. 2. 2. 2. 2. 0. 2. 2. 0. 2.
 2. 2. 0. 2. 2. 0. 2. 2. 0. 0. 0. 2. 2. 2. 2.]
| [0m398      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m |
./peptide output/Test/201

2023-08-16 13:04:47,918 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 13:04:47,919 - root - INFO - Loss: -4.0
2023-08-16 13:04:47,920 - root - INFO - The running time of 20160622_1448_ME14_2+_1to2500_CVscan_NCE35: 1205.2230964265764
2023-08-16 13:04:48,034 - root - INFO - Optimizing weights for 20160622_1514_ME14_3+_1to2500_CVscan_NCE35


unidentified_index:  4
weights:  [0. 2. 2. 0. 0. 2. 0. 2. 2. 2. 0. 2. 0. 2. 2. 2. 2. 2. 0. 0. 2. 2. 0. 0.
 0. 2. 2. 0. 0. 0. 2. 2. 0. 0. 0. 2. 2. 2. 0.]
| [0m402      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 13:26:49,482 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 13:26:49,484 - root - INFO - Loss: -4.0
2023-08-16 13:26:49,484 - root - INFO - The running time of 20160622_1514_ME14_3+_1to2500_CVscan_NCE35: 1321.4497544746846
2023-08-16 13:26:49,571 - root - INFO - Optimizing weights for 20160708_1747_UN14_2+_0,01mM_CVscan_Turbo


unidentified_index:  4
weights:  [0. 2. 2. 0. 0. 2. 0. 2. 2. 2. 0. 0. 0. 2. 2. 2. 2. 0. 2. 0. 2. 2. 0. 2.
 0. 2. 2. 2. 2. 2. 2. 2. 0. 2. 0. 0. 0. 0. 2.]
| [0m403      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m |
./peptide output/Test/201



./peptide output/Test/20160428_2100_ME16_3+_CVscan_NCE35_Turbo/10000 scans
sig calculated for feature 50


2023-08-16 14:03:39,821 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 14:03:39,822 - root - INFO - Loss: -4.0
2023-08-16 14:03:39,823 - root - INFO - The running time of PH8_2+_CVscan_NCE35_Turbo_20160505_1308: 1122.6507335416973
2023-08-16 14:03:39,944 - root - INFO - Optimizing weights for 20160428_2100_ME16_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [2. 2. 2. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2. 0. 0. 0. 2. 2. 2. 0. 2.
 2. 2. 2. 2. 2. 0. 2. 2. 0. 0. 0. 0. 0. 0. 0.]
| [0m405      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m |
epoch: 294 to 308
./pepti

2023-08-16 14:25:50,710 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 14:25:50,711 - root - INFO - Loss: -4.0
2023-08-16 14:25:50,711 - root - INFO - The running time of 20160428_2100_ME16_3+_CVscan_NCE35_Turbo: 1330.7666233237833
2023-08-16 14:25:50,823 - root - INFO - Optimizing weights for 20160428_2222_ME16_2+_CVScan_NCE35_Turbo


unidentified_index:  4
weights:  [0. 2. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2. 0. 0. 0. 0. 2. 2. 0. 0.
 2. 2. 2. 0. 2. 2. 2. 0. 0. 0. 0. 0. 0. 0. 2.]
| [0m406      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m |
./peptide output/Test/201

2023-08-16 14:45:58,695 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 14:45:58,696 - root - INFO - Loss: -4.0
2023-08-16 14:45:58,696 - root - INFO - The running time of 20160428_2222_ME16_2+_CVScan_NCE35_Turbo: 1207.872574975714
2023-08-16 14:45:58,851 - root - INFO - Optimizing weights for 20160504_0930_ME4_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [0. 2. 0. 0. 0. 2. 0. 0. 2. 2. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 2. 0. 0. 2.
 2. 2. 2. 2. 0. 2. 0. 2. 2. 2. 0. 2. 0. 2. 0.]
| [0m407      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 15:08:24,774 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 15:08:24,775 - root - INFO - Loss: -4.0
2023-08-16 15:08:24,775 - root - INFO - The running time of 20160504_0930_ME4_3+_CVscan_NCE35_Turbo: 1345.922848423943
2023-08-16 15:08:24,867 - root - INFO - Optimizing weights for 20160511_2003_ME17_2+_CVscan_Turbo


unidentified_index:  4
weights:  [0. 2. 0. 0. 0. 0. 0. 2. 2. 2. 0. 2. 0. 2. 2. 0. 0. 2. 2. 0. 2. 2. 0. 0.
 0. 0. 0. 2. 2. 0. 0. 0. 0. 2. 0. 0. 2. 2. 0.]
| [0m408      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 15:27:55,600 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 15:27:55,601 - root - INFO - Loss: -4.0
2023-08-16 15:27:55,601 - root - INFO - The running time of 20160511_2003_ME17_2+_CVscan_Turbo: 1170.7331245522946
2023-08-16 15:27:55,714 - root - INFO - Optimizing weights for 20160602_1249_ME8_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [0. 2. 2. 2. 0. 2. 2. 2. 2. 2. 0. 2. 0. 2. 2. 2. 0. 0. 2. 0. 2. 2. 0. 0.
 0. 0. 0. 2. 2. 0. 2. 0. 2. 2. 0. 0. 2. 2. 0.]
| [0m409      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201



./peptide output/Test/20160428_2100_ME16_3+_CVscan_NCE35_Turbo/10000 scans
sig calculated for feature 50


2023-08-16 15:50:37,132 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 15:50:37,133 - root - INFO - Loss: -4.0
2023-08-16 15:50:37,134 - root - INFO - The running time of 20160602_1249_ME8_3+_CVscan_NCE35_Turbo: 1361.4190431889147


unidentified_index:  4
weights:  [0. 2. 0. 2. 2. 0. 2. 0. 2. 2. 0. 2. 0. 2. 0. 0. 0. 2. 0. 0. 2. 0. 0. 2.
 2. 2. 0. 2. 2. 0. 0. 2. 0. 2. 0. 2. 0. 0. 0.]
| [0m410      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m |


2023-08-16 15:50:37,402 - root - INFO - Optimizing weights for 20160603_1005_ME9_2+_CVscan_NCE35_Turbo


./peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/10000 scans
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
unidentified_index:  1
weights:  [2. 0. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 2. 2. 2. 2. 0. 2. 2. 2. 2. 2.
 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2.]
|   iter    |  target   |    w_0    |    w_1    |   w_10    |   w_11    |   w_12    |   w_13    |   w_14    |   w_15    |   w_16    |   w_17    |   w_18    |   w_19    |    w_2    |   w_20    |   w_21    |   w_22    |   w_23    |   w_24    |   w_25    |   w_26    |   w_27    |   w_28    |   w_29    |    w_3    |   w_30    |   w_31    |   w_32    |   w_33    |   w_34    |   w_35    |   w_36    |   w_37    |   w_38    |    w_4    |    w_5    |    w_6    |    w_7    |    w_8    |    w_9    |
----------------------------------------------------------------------------------------------

2023-08-16 16:10:24,671 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 16:10:24,672 - root - INFO - Loss: -4.0
2023-08-16 16:10:24,673 - root - INFO - The running time of 20160603_1005_ME9_2+_CVscan_NCE35_Turbo: 1187.2695826999843


unidentified_index:  4
weights:  [0. 2. 2. 0. 0. 2. 0. 2. 2. 2. 0. 0. 0. 2. 2. 0. 0. 2. 0. 2. 2. 0. 2. 0.
 0. 2. 2. 0. 2. 0. 0. 0. 0. 2. 0. 0. 0. 0. 0.]
| [0m411      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m |


2023-08-16 16:10:24,949 - root - INFO - Optimizing weights for 20160603_1040_ME9_3+_CVscan_NCE35_Turbo


./peptide output/Test/20160603_1040_ME9_3+_CVscan_NCE35_Turbo/10000 scans
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
unidentified_index:  4
weights:  [2. 0. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 2. 2. 2. 2. 0. 2. 2. 2. 2. 2.
 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2.]
|   iter    |  target   |    w_0    |    w_1    |   w_10    |   w_11    |   w_12    |   w_13    |   w_14    |   w_15    |   w_16    |   w_17    |   w_18    |   w_19    |    w_2    |   w_20    |   w_21    |   w_22    |   w_23    |   w_24    |   w_25    |   w_26    |   w_27    |   w_28    |   w_29    |    w_3    |   w_30    |   w_31    |   w_32    |   w_33    |   w_34    |   w_35    |   w_36    |   w_37    |   w_38    |    w_4    |    w_5    |    w_6    |    w_7    |    w_8    |    w_9    |
----------------------------------------------------------------------------------------------

2023-08-16 16:30:57,934 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 16:30:57,935 - root - INFO - Loss: -4.0
2023-08-16 16:30:57,936 - root - INFO - The running time of 20160603_1040_ME9_3+_CVscan_NCE35_Turbo: 1232.9856652207673
2023-08-16 16:30:58,050 - root - INFO - Optimizing weights for 20160622_1448_ME14_2+_1to2500_CVscan_NCE35


unidentified_index:  4
weights:  [0. 2. 2. 2. 0. 0. 2. 2. 2. 2. 0. 2. 0. 2. 0. 0. 0. 0. 0. 0. 2. 2. 0. 0.
 2. 2. 0. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2. 2. 0.]
| [0m412      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m |
./peptide output/Test/201

2023-08-16 16:50:41,498 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 16:50:41,499 - root - INFO - Loss: -4.0
2023-08-16 16:50:41,499 - root - INFO - The running time of 20160622_1448_ME14_2+_1to2500_CVscan_NCE35: 1183.4486548956484


unidentified_index:  4
weights:  [2. 2. 2. 0. 0. 0. 0. 2. 2. 2. 0. 0. 0. 2. 2. 0. 2. 0. 2. 2. 2. 0. 0. 2.
 2. 2. 2. 2. 2. 0. 0. 0. 0. 2. 0. 2. 2. 0. 0.]
| [0m413      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m |


2023-08-16 16:50:41,614 - root - INFO - Optimizing weights for 20160622_1514_ME14_3+_1to2500_CVscan_NCE35


./peptide output/Test/20160622_1514_ME14_3+_1to2500_CVscan_NCE35/10000 scans
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
unidentified_index:  6
weights:  [2. 0. 0. 0. 2. 0. 2. 0. 2. 2. 0. 2. 0. 0. 2. 2. 2. 2. 0. 2. 2. 2. 2. 2.
 0. 0. 2. 0. 2. 2. 0. 2. 2. 0. 2. 0. 2. 2. 2.]
|   iter    |  target   |    w_0    |    w_1    |   w_10    |   w_11    |   w_12    |   w_13    |   w_14    |   w_15    |   w_16    |   w_17    |   w_18    |   w_19    |    w_2    |   w_20    |   w_21    |   w_22    |   w_23    |   w_24    |   w_25    |   w_26    |   w_27    |   w_28    |   w_29    |    w_3    |   w_30    |   w_31    |   w_32    |   w_33    |   w_34    |   w_35    |   w_36    |   w_37    |   w_38    |    w_4    |    w_5    |    w_6    |    w_7    |    w_8    |    w_9    |
-------------------------------------------------------------------------------------------

2023-08-16 17:12:54,947 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 17:12:54,948 - root - INFO - Loss: -4.0
2023-08-16 17:12:54,949 - root - INFO - The running time of 20160622_1514_ME14_3+_1to2500_CVscan_NCE35: 1333.3337427657098
2023-08-16 17:12:55,036 - root - INFO - Optimizing weights for 20160708_1747_UN14_2+_0,01mM_CVscan_Turbo


unidentified_index:  4
weights:  [2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 0. 2. 0. 2. 0. 2. 0. 2. 0. 0. 2. 2. 2. 2.
 2. 2. 0. 2. 0. 2. 0. 0. 2. 0. 0. 2. 0. 0. 2.]
| [0m414      [0m | [0m4.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m |
./peptide output/Test/201

2023-08-16 17:32:35,714 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 17:32:35,716 - root - INFO - Loss: -4.0
2023-08-16 17:32:35,716 - root - INFO - The running time of 20160708_1747_UN14_2+_0,01mM_CVscan_Turbo: 1180.67930621095
2023-08-16 17:32:35,842 - root - INFO - Optimizing weights for PH8_2+_CVscan_NCE35_Turbo_20160505_1308


unidentified_index:  4
weights:  [0. 2. 2. 2. 0. 0. 0. 0. 2. 2. 0. 2. 0. 2. 2. 2. 0. 0. 0. 0. 2. 2. 0. 2.
 0. 2. 2. 0. 2. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0.]
| [0m415      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m |
./peptide output/Test/PH8

2023-08-16 17:51:30,306 - root - INFO - Weights: [2. 2. 0. 0. 2. 0. 0. 2. 2. 2. 0. 2. 0. 2. 0. 0. 2. 2. 0. 0. 2. 0. 0. 0.
 2. 2. 2. 2. 0. 0. 2. 0. 0. 0. 2. 0. 2. 0. 2.]
2023-08-16 17:51:30,307 - root - INFO - Loss: -4.0
2023-08-16 17:51:30,308 - root - INFO - The running time of PH8_2+_CVscan_NCE35_Turbo_20160505_1308: 1134.464910723269
2023-08-16 17:51:30,428 - root - INFO - Optimizing weights for 20160428_2100_ME16_3+_CVscan_NCE35_Turbo


unidentified_index:  4
weights:  [0. 2. 2. 0. 0. 2. 0. 0. 2. 2. 0. 0. 0. 2. 2. 2. 2. 2. 0. 0. 2. 2. 2. 2.
 2. 2. 0. 2. 2. 0. 0. 2. 2. 0. 0. 0. 0. 0. 0.]
| [0m416      [0m | [0m4.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m2.0      [0m | [0m2.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m | [0m0.0      [0m |
epoch: 308 to 322
./pepti

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

def convert_to_1d_list(lst_2d):
    return [item for sublist in lst_2d for item in sublist]

# Read the data from the file
with open('weights.txt', 'r') as file:
    data = file.readlines()

unidentified_peaks = []
weights = []

# Extract the 'Number of unidentified peaks' and 'Weights' data
for line in data:
    if line.startswith('Number of unidentified peaks:'):
        unidentified_peaks.append([int(x) for x in line.split('[')[1].split(']')[0].strip().split(',')])

firstline = data[0]
weight = []
for line in data:
    if line.startswith('Weights:'):
        weight.append([float(x) for x in line.split('[')[1].strip().split()])
        firstline = line
    elif line.startswith('|'):
        firstline = line
        weight = convert_to_1d_list(weight)
        weights.append(weight)
        weight = []
    else:
        if firstline.startswith('Weights:'):
            if ']' in line:
                line = line.split(']')[0]  # Remove the closing square bracket if present
            weight.append([float(x) for x in line.strip().split()])

In [73]:
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Convert data to numpy arrays
unidentified_peaks = np.array(unidentified_peaks)
weights = np.array(weights)

# Create polynomial features
poly_features = PolynomialFeatures(degree=4)
X_poly = poly_features.fit_transform(unidentified_peaks)

# Train the model using cross-validation
model = ElasticNet(alpha=5, l1_ratio=0.9, max_iter=10000)  # Set appropriate alpha and l1_ratio values, increase max_iter
kf = KFold(n_splits=10, random_state=100, shuffle=True)  # Using 10-fold cross-validation
mse_scores = -cross_val_score(model, X_poly, weights, cv=kf, scoring='neg_mean_squared_error')
mae_scores = -cross_val_score(model, X_poly, weights, cv=kf, scoring='neg_mean_absolute_error')
r2_scores = cross_val_score(model, X_poly, weights, cv=kf, scoring='r2')

# Print evaluation metrics
print("Mean Squared Error (MSE):", mse_scores.mean())
print("Mean Absolute Error (MAE):", mae_scores.mean())
print("R-squared (R2):", r2_scores.mean())

# Train the model on the full dataset
model.fit(X_poly, weights)

# Infer new weights to achieve [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] unidentified peaks
new_unidentified_peaks = poly_features.transform(np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]))
new_weights = model.predict(new_unidentified_peaks)

print("Inferred weights:", new_weights)


Mean Squared Error (MSE): 0.5075366232265835
Mean Absolute Error (MAE): 0.5716404360082807
R-squared (R2): -1.314628293764832
Inferred weights: [[0.78136551 0.55991325 0.38953968 1.17144206 1.7280392  1.41160309
  0.7367098  0.50568374 1.2368116  0.50839384 0.08375496 1.09107162
  0.81373969 0.94177423 1.14768979 1.2541851  1.49812077 1.09032953
  0.93572582 0.95428169 0.77798594 0.62427191 0.05172322 1.00278769
  1.15315906 0.06058991 0.56622675 0.70896753 0.32560625 1.20539019
  0.55439145 0.95871153 0.86254659 0.59045698 0.53019357 0.56329984
  0.97629813 1.2658647  0.89319855 1.27864025 0.3694841  2.05249244
  0.55211351 1.30889555 1.26396847 0.78141277 1.3252244  1.61221797
  0.63785475 0.74358429 0.88887083 1.70667033 1.69263916 0.70931013
  1.00956138 0.980615   0.73625802 1.05577139 1.3413735  0.71028473
  0.93015966 1.18033695 0.48946416 0.75422201 0.69985193 0.52413704
  0.05455846 0.87979785 1.09858879 0.53437486 1.46856541 1.61117002
  1.03077104 1.33974688 0.74895944 0.551

In [6]:
def pcov_map(array1, array2, pCov_params, norm_factor):
    array1[1:] *= norm_factor
    array2[1:] *= norm_factor

    num_scans = pCov_params.shape[0]

    av_yx = array2[1:].T @ array1[1:] / (num_scans - 1)

    sx_vec = array1[1:].sum(axis=0)
    sy_vec = array2[1:].sum(axis=0)
    av_y_av_x = np.outer(sy_vec, sx_vec) / (num_scans * (num_scans - 1))

    si2 = (pCov_params ** 2).sum(axis=0)
    s2i = pCov_params.sum(axis=0) ** 2
    var = (si2 - s2i / num_scans) / (num_scans - 1)

    si_sx = pCov_params.sum(axis=0) * array1[1:].sum(axis=0)
    cov_xi_vec = (pCov_params.T @ array1[1:]) / (num_scans - 1) - si_sx / (num_scans * (num_scans - 1))

    si_sy = pCov_params.sum(axis=0) * array2[1:].sum(axis=0)
    cov_yi_vec = (pCov_params.T @ array2[1:]) / (num_scans - 1) - si_sy / (num_scans * (num_scans - 1))

    pcov_matrix = av_yx - av_y_av_x - cov_yi_vec.reshape(-1, 1) @ cov_xi_vec.reshape(1, -1) / var
    pcov_matrix *= (num_scans - 1) / (num_scans - 2)

    sx = array1[1:][:num_scans].sum(axis=0)
    sy = array2[1:][:num_scans].sum(axis=0)
    sysx = np.outer(sy, sx)
    cov_matrix = array2[1:].T @ array1[1:] - sysx / num_scans
    cov_matrix /= num_scans - 1

    return pcov_matrix.T, cov_matrix.T

