In [1]:
from datetime import datetime
start = datetime.now()

import logging
logging.basicConfig(filename='torch_time.log', filemode='w', format='%(asctime)s - %(message)s', level=logging.INFO)

import shutil

In [2]:

import numpy as np
import argparse
import sys

parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='?', type=np.array, default=None, help='weights for weighted covariance map (default: None)')
parser.add_argument('--epoch', nargs='?', type=int, default=None, help='epoch number (default: None)')
parser.add_argument('--dirname', nargs='?', type=str, default='Test', help='directory name')
parser.add_argument('--name', nargs='?', type=str, default='6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD', help='file name')
parser.add_argument('--run_all', nargs='?', type=bool, default='False', help='whether to run all files in the directory or not (default: False)')
if sys.argv[1] == '-f':
    sys.argv=[sys.argv[0:-2]]
args = parser.parse_args()
print(args.name)



6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD


In [3]:
import os
import matplotlib.pyplot as plt
from pC2DMSUtils import maxIndices#, varII, covXI, cutAC, scaleToPower
import pC2DMS
import pC2DMSUtils
import time
from pathlib import Path

def txt2cache(textFile, path, numscanInterval=None):
    saveName=path+'/save'
    numScans='all'

    print('path:', path)
    print(numScans, 'scans')
    print('save in', saveName)

    start = time.time()
    pC2DMSUtils.readTextFile(textFile, path, numscanInterval=numscanInterval)
    readTime = time.time()


def preprocess(prefix, data_path='../IC/raw data/Test/', cache_path='./peptide output/Test/', numscanInterval=None):
    data_filename = data_path + prefix + '.txt'
    cache_dir = cache_path + prefix + '/10000 scans'
    if not os.path.exists(cache_path + prefix):
        os.makedirs(cache_path + prefix)
    #     if Path(cache_dir).is_dir():
    #         print('Cache directory exists. Preprocessing skipped.')
    #     else:
    txt2cache(data_filename, cache_dir, numscanInterval=numscanInterval)
    return cache_dir
    
    
def pcovmap(path, mode, weights=None):
    scan1=pC2DMS.Scan(path)
    scanTime = time.time()
    if mode == 'w':
        map1=pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
    elif mode == 'tic':
        map1=pC2DMS.PCovMap(scan1, scan1.tic(), numScans='all')
    else:
        map1=pC2DMS.CovMap(scan1, numScans='all')
    mapTime = time.time()
    print('map time', mapTime-scanTime)
    del scan1, mapTime, scanTime
    return map1


def run_covmap(prefix, mode = 'tic', data_path='../IC/raw data/Test/', cache_path='./peptide output/Test/',
               numscanInterval=None, weights=None, epoch=None):
    cache_dir = cache_path + prefix + '/10000 scans'
    if numscanInterval is not None:
        for numscan in np.arange(numscanInterval, 10000, numscanInterval):
            if not os.path.exists(cache_path + prefix + '/' + str(numscan) + ' scans/' + 'array.npy'):
                cache_dir = preprocess(prefix, data_path=data_path, cache_path=cache_path, numscanInterval=numscanInterval)
                break
    else:
        if not os.path.exists(cache_path + prefix + '/10000 scans'):
            cache_dir = preprocess(prefix, data_path=data_path, cache_path=cache_path)
    for dirname in os.listdir(os.path.abspath(os.path.join(cache_dir, os.pardir))):
        if numscanInterval is not None:
            cache_dir = os.path.join(os.path.abspath(os.path.join(cache_dir, os.pardir)), dirname)
        if not os.path.exists(cache_dir + '/array.npy'):
            continue
        featfile = cache_dir+'/'+mode+'_topfeat.npy'
        reportfile = cache_dir+'/'+mode+'_topfeat.csv'
        if epoch is not None:
            featfile = cache_dir+'/'+mode+'_'+epoch+'_topfeat.npy'
            reportfile = cache_dir+'/'+mode+'_'+epoch+ '_topfeat.csv'
        print(cache_dir)
        cmap = pcovmap(cache_dir, mode, weights=weights)
        mapfile = cache_dir+'/'+mode+'_map.npy'
        np.save(mapfile, cmap.array)
        if mode == 'w':
            if Path(featfile).is_file():
                os.remove(featfile)
            topfeat = cmap.analyse(3000)
            np.save(featfile, topfeat)
        elif Path(featfile).is_file():
            print('Feature file exists. Features loaded. Analysis skipped.')
            topfeat = np.load(featfile)
        else:
            if mode == 'tic':
                topfeat = cmap.analyse(3000)
            else:
                topfeat = cmap.analyse(1000)
            np.save(featfile, topfeat)

        topfeat_sorted = topfeat[np.flip(topfeat[:, 3].argsort())]
    #     new_template = np.copy(cmap.array)
    #     new_template.fill(0)
        np.savetxt(reportfile, topfeat_sorted, fmt = '%.2f', delimiter=',')
        if numscanInterval is None:
            break
    return cmap

In [4]:
if args.run_all:
    # # # datafile_list = ['20200206_1455,20_ME16_3+_NCE15_AGC100_cvScan2000_turbo_1,7V_2uL',
    # # #                  '20160503_1127_PH4_3+_CVscan_NCE35_Turbo',
    # # #                  '20160603_1040_ME9_3+_CVscan_NCE35_Turbo',
    # # #                  '20160622_1514_ME14_3+_1to2500_CVscan_NCE35'
    ####                    '7255_2D-PC-MS_0-5pmol-ul_0-1AGC_0-7quadiso',
    # # #                 ]

    # datafile_list = ['7302_2D-PC-MS_2pmol-ul_1AGC_0-7quadiso',
    #                  '7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso',
    #                  '7302_2D-PC-MS_0-5pmol-ul_0-1AGC_0-7quadiso',
    #                  '7302_2D-PC-MS_0-1pmol-ul_0-1AGC_0-7quadiso',
    #                  '7255_2D-PC-MS_5pmol-ul_1AGC_0-7quadiso',
    #                  '7255_2D-PC-MS_1pmol-ul_0-1AGC_0-7quadiso']

    datafile_dict = {}

    for dirname in os.listdir('../IC/IC/raw data/'):
        datafile_dict[dirname] = []
        for filename in os.listdir('../IC/IC/raw data/' + str(dirname)):
            datafile_dict[dirname].append(os.path.splitext(filename)[0])
        # 6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD']

In [5]:

def run(dirname, datafile_list, numscanInterval=None, mode=['tic'], weights=None, epoch=None):
    try:
        for prefix in datafile_list:
            sub_start = datetime.now()
            for mode in mode:
                cmap = run_covmap(prefix, mode, data_path=os.path.join('../IC/IC/raw data', str(dirname), ''),
                                  cache_path=os.path.join('./peptide output', str(dirname)+'/'), numscanInterval=numscanInterval,
                                  weights=weights, epoch=epoch)
            sub_stop = datetime.now()
            logging.info(f"The running time of {prefix}: {sub_stop - sub_start}")
            del prefix, sub_start, sub_stop, mode, cmap, dirname, datafile_list, numscanInterval, weights, epoch
    except FileExistsError:
        logging.info('File exists. Running skipped.')

In [6]:
if args.run_all:
    print(datafile_dict.keys())

dict_keys(['CID', '.ipynb_checkpoints', 'Test', 'HCD'])


In [7]:

numscanInterval = 1000
logging.info(f"Saving files different from {numscanInterval} scans to maximum scans")

2023-08-24 01:46:21,070 - root - INFO - Saving files different from 1000 scans to maximum scans


In [8]:
name = list(datafile_dict.values())[2][1]

In [9]:
run(args.dirname, [args.name], numscanInterval=numscanInterval, weights=args.weights)

/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/3000 scans



KeyboardInterrupt



In [None]:
if args.run_all:
    run(list(datafile_dict)[0], [list(datafile_dict.values())[0][0]], numscanInterval=numscanInterval, weights=args.weights)

In [None]:
if args.run_all:
    run(list(datafile_dict)[0], [list(datafile_dict.values())[0][1]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[0], [list(datafile_dict.values())[0][2]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][0]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][1]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][2]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][3]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [None]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][4]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

In [11]:
if args.run_all:
    run(list(datafile_dict)[1], [list(datafile_dict.values())[1][5]], numscanInterval=numscanInterval, weights=args.weights, epoch=args.epoch)

IndexError: list index out of range

In [10]:
if args.run_all:
    run(list(datafile_dict)[2], [list(datafile_dict.values())[2][0]], numscanInterval=1000, weights=args.weights, epoch=args.epoch)

path: ./peptide output/Test/7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_35CID/10000 scans
all scans
save in ./peptide output/Test/7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_35CID/10000 scans/save
Reading file ../IC/IC/raw data/Test/7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_35CID.txt to ./peptide output/Test/7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_35CID/10000 scans
Reading scan number 100
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/rds/general/user/ww1922/home/anaconda3/envs/test1/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/tmp/pbs.7977053.pbs/ipykernel_1810035/2107556360.py", line 2, in <module>
    run(list(datafile_dict)[2], [list(datafile_dict.values())[2][0]], numscanInterval=1000, weights=args.weights, epoch=args.epoch)
  File "/var/tmp/pbs.7977053.pbs/ipykernel_1810035/1911021906.py", line 6, in run
    cmap = run_covmap(prefix, mode, data_path=os.path.join('../IC/IC/raw data', str(dirname), ''),
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/tmp/pbs.7977053.pbs/ipykernel_1810035/3292070307.py", line 55, in run_covmap
    cache_dir = preprocess(prefix, data_path=data_path, cache_path=cache_path, numscanInterval=numscanInterval)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [8]:
if args.run_all:
    run(list(datafile_dict)[2], [list(datafile_dict.values())[2][1]], numscanInterval=1000, weights=args.weights, epoch=args.epoch)

/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/3000 scans
map time 0.4995143413543701
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/6000 scans
map time 0.56827712059021
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/9000 scans
map time 0.804645299911499
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/2000 scans
map time 0.5336406230926514
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/10000 scans
map time 0.7258303165435791
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/201

In [None]:
import dill
import pC2DMS
weights = np.array()
with open('optimizer_tpe_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func3,bins:100).npy', top_feat)

In [8]:
import dill
import pC2DMS
weights = np.array([1.10342122, 1.74157288, 0.95289493, 0.8134206, 0.64139062, 0.56111331
, 0.86290635, 0.32493003, 0.17430608, 0.2823324, 0.14877877, 1.03187679
, 1.30729305, 1.52126059, 1.19581792, 1.36287387, 1.11043203, 1.2143669
, 1.45443463, 0.69125176, 0.82259259, 1.68136914, 1.17540044, 1.35825569
, 1.03686637, 1.31171829, 1.92378558, 1.31088039, 1.40793891, 0.11603733
, 1.85584202, 1.40675701, 1.32256018, 0.66425642, 1.09700642, 1.47240462
, 1.37059904, 0.35769377, 1.2734134])
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func2,bins:100)_new.npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [9]:
print(list(datafile_dict.values())[2][1])

20160603_1005_ME9_2+_CVscan_NCE35_Turbo


In [8]:
import dill
import pC2DMS
weights = np.array([1.77742493, 0.26736808, 1.79356456, 0.60505574, 0.68142098, 0.55045802
, 0.41505678, 0.23192339, 0.74895327, 1.05238373, 0.46418789, 0.92438629
, 1.76361323, 1.11775136, 1.21711266, 1.95209787, 0.04707956, 0.78448469
, 0.17315706, 1.0403938, 0.32855712, 1.61574099, 0.19461299, 0.5621586
, 0.5571355, 1.09908919, 0.70036215, 1.75407656, 1.01922498, 0.46050934
, 0.25243517, 0.33699795, 0.48094845, 1.39329184, 0.57847116, 1.91095005
, 1.09944832, 0.97740241, 1.67971844])
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func1,bins:100)_new.npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = np.load('weights_tpe.npy')
with open('optimizer_tpe_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func3,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = np.array([1.10852360e+00, 1.03970814e+00, 4.36300691e-01, 1.31365090e+00
, 4.55692794e-01, 8.46024175e-01, 1.11253384e+00, 1.80067251e+00
, 1.21228033e+00, 2.34350823e-01, 1.57904318e-03, 7.55099172e-01
, 3.18901463e-01, 9.36451730e-01, 1.34660578e+00, 1.13076963e+00
, 1.12982963e+00, 1.45391947e+00, 1.00338455e+00, 1.86823253e+00
, 1.17855160e+00, 1.57184266e+00, 1.30403196e+00, 9.19918662e-01
, 2.79588481e-01, 2.75350232e-01, 1.10435645e+00, 1.10348111e+00
, 1.12390881e+00, 7.22830155e-01, 1.36734980e+00, 1.91305993e+00
, 1.95821784e+00, 4.40292674e-01, 1.01821398e+00, 2.66211971e-01
, 1.69587907e+00, 1.02986897e+00, 1.45997074e+00])
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func4,bins:100)_new.npy', top_feat)


found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400


In [9]:
import dill
import pC2DMS
weights = [2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2,
 0, 2, 2, 0, 0, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_bayopt(loss_func_2,bins:100)_new.npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = [0, 0, 0, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2,
 0, 2, 2, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_bayopt(loss_func1,bins:100)_new.npy', top_feat)

In [10]:
import dill
import pC2DMS
weights = [0.74908024, 1.90142861, 1.66488528, 0.73272369, 1.89777107, 1.93126407
, 1.6167947, 0.60922754, 0.19534423, 1.36846605, 1.46398788, 1.19731697
, 0.31203728, 0.31198904, 0.11616722, 1.73235229, 1.20223002, 1.41614516
, 0.04116899, 1.9398197, 0.42467822, 0.36364993, 0.36680902, 0.60848449
, 1.04951286, 0.86389004, 0.58245828, 1.22370579, 0.27898772, 0.5842893
, 0.91213997, 1.57035192, 0.39934756, 1.02846888, 1.18482914, 0.09290083
, 1.2150897, 0.34104825, 0.13010319]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_bayopt(loss_func3,bins:100)_new.npy', top_feat)


found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = [0, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2,
 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_bayopt(loss_func4,bins:100)_new.npy', top_feat)


found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = [ 0.54078058, 1.91235936, 1.62655215, 0.98051032, 0.0998208, 0.0011511416
, 0.002332195,  1.86147134, 1.38810316, 1.7995949, 0.0019062048, 1.65904835
, 1.44386554, 0.56240998, 0.73713614, 0.01487633, 0.53509304, 0.92510706
, 1.03444032, 0.49783055, 1.63770276, 0.17656574, 0.66594761, 0.43713566
, 1.11780525, 1.38201146, 0.51417041, 1.11179031, 0.94547257, 0.16143622
, 0.90912566, 0.48962911, 0.02293372, 2.16782376, 1.7012007,  1.5526123
, 0.52629643, 0.0004528244, 1.09591412]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func2,bins:100)_new.npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = [0.87198428, 0.68777021, 1.01795217, 0.7716168, 1.0648289, 0.86397994
, 0.75136499, 1.11068005, 1.26627141, 1.66521011, 0.99750603, 0.89477338
, 1.22469546, 0.71488525, 1.44985682, 1.12219983, 1.22642495, 0.86819807
, 0.95986557, 1.23142302, 0.81214284, 0.67746308, 1.00690961, 0.95587001
, 1.4200546, 0.65757342, 0.78818154, 0.89232315, 0.81914443, 0.76466677
, 0.7983191, 0.82793738, 0.85807479, 0.8224566, 0.78738265, 1.47025078
, 1.19951112, 1.2040894, 0.94259879, 0.80715694, 1.11184515, 0.63483471
, 1.13313472, 1.05243056, 0.87514524, 1.67005402, 0.862337, 0.67911068
, 1.05266955, 0.7748282, 1.25342226, 1.26187574, 0.90096771, 0.88453938
, 1.11914874, 1.08272728, 1.0683505, 0.83556591, 0.76477427, 0.91128541
, 1.26674233, 0.92334216, 0.78757248, 1.19857263, 0.84077996, 0.86083711
, 1.03330723, 0.98194791, 1.21986751, 1.63670788, 1.02694419, 0.70286656
, 0.90663071, 1.33366441, 1.11390551, 0.86586875, 1.58646817, 1.04841778
, 1.12230492, 0.79567624, 1.18097354, 1.07825851, 1.12660943, 1.04212653
, 0.91695336, 1.25433897, 0.94266378, 1.11215407, 0.72935826, 0.76088341
, 0.931085, 1.05899334, 1.8259861, 0.91874732, 0.61429458, 1.03770328
, 0.82013083, 1.09337602, 1.01910799, 0.6806872, 1.18878701, 0.86793008
, 1.18087911, 0.87001041, 0.73416922, 0.8967718, 0.94117019, 1.14080505
, 0.78194158, 0.63973257, 1.27371626, 1.04440407, 1.00156664, 0.84381549
, 0.71709606, 0.99613254, 1.24006755, 0.73606457, 1.33987664, 0.99302416
, 0.96541988, 1.28297668, 0.75528914, 1.26241501, 0.89946114, 1.06764348
, 0.643176, 0.58360713, 1.02761103, 1.07593344, 1.12800901, 1.04378883]
weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func1,bins:100)_new.npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = [0.004339374, 0.50798032, 1.64351229, 0.67775987, 0.01657082, 0.75269187
, 1.18337271, 0.93812863, 0.48505375, 0.21487606, 1.09743312, 0.71238726
, 1.55784343, 1.51592355, 0.35794554, 0.12552721, 0.85348355, 0.83238549
, 1.2352135, 1.30421881, 1.18605693, 0.40048151, 1.35825518, 0.31463801
, 1.06113056, 1.40472375, 0.53190937, 1.1505039, 1.05797006, 0.85244303
, 1.00309384, 0.51703579, 0.46421245, 0.90394702, 0.92111887, 1.03359514
, 1.24296482, 0.89522082, 1.1051857]

weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func3,bins:100)_new.npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = [0.74160025, 1.17554347, 1.22119417, 1.36680097, 1.08999095, 1.22091697
, 0.99335227, 0.97095117, 0.95386964, 0.76903355, 0.82569467, 0.89859331
, 1.1068456, 0.74264423, 0.79489182, 0.7577499, 0.97953646, 1.08452884
, 0.9237643, 0.91474233, 1.03872657, 1.01189228, 1.12106478, 0.66953374
, 1.36448624, 1.35191527, 0.95438715, 1.05684252, 0.90583876, 0.84336418
, 1.19558659, 1.18986215, 1.16811959, 0.85614931, 0.79294955, 1.28305282
, 0.9318661, 0.89405129, 0.80366216, 1.65484042, 0.87095745, 0.95280496
, 0.84949559, 0.95754092, 0.84557664, 0.98703843, 0.7958573, 1.13044703
, 2.17460658, 1.0500949, 0.71339967, 0.82321625, 0.9881275, 0.88146648
, 1.27887592, 0.78665736, 0.84669591, 1.18625178, 1.07888715, 0.69218346
, 0.80818622, 0.83819566, 0.73283246, 1.01583718, 1.50590281, 0.81426785
, 1.06637176, 0.86762882, 1.11352546, 0.95965498, 0.6892157, 0.79029643
, 1.03726876, 1.0607466, 0.90106773, 0.86688621, 0.90564701, 0.57473586
, 0.89276744, 0.8087, 0.58280645, 1.142229, 0.76628774, 0.88648582
, 0.99050494, 1.07598116, 0.96590563, 0.88465524, 1.00834114, 0.965941
, 0.6895089, 0.65006118, 1.02729331, 1.10461439, 0.55728501, 1.43483551
, 0.96925734, 0.9050727, 1.96655056, 0.94289558, 1.74285389, 0.68880273
, 0.90954084, 1.0566941, 1.11382684, 0.95825931, 1.12884838, 1.04043493
, 0.89725065, 1.00061915, 0.97365071, 1.22249004, 0.95852889, 0.89027841
, 1.07254553, 0.79384854, 1.10607781, 0.82076046, 0.83609501, 1.08345376
, 0.99252562, 1.03735257, 0.95148837, 0.88197611, 1.22194952, 1.1413378
, 0.8436952, 0.78352289, 0.97301558, 1.14048605, 1.09396806, 1.23491957]

weights = np.array(weights)
weights[weights==0] = 0.00001
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(weights), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func4,bins:100)_new.npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = np.load('weights_cma.npy')
with open('optimizer_cma_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][13] + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(optimizer.result.xbest), numScans='all')
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][13] + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func3,bins:100).npy', top_feat)

In [16]:
print(list(datafile_dict.values())[2])

['7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_35CID', '20160603_1005_ME9_2+_CVscan_NCE35_Turbo', '20160629_1557_ME15_2+_CVscan_NCE25', '20160603_1040_ME9_3+_CVscan_NCE35_Turbo', '6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD', '20160318_1809_PH4_2+_CVScan_Turbo', '20160708_1939_UN15_2+_0,01mM_CVscan_Turbo', '20160622_1514_ME14_3+_1to2500_CVscan_NCE35', '20160318_1424_SU5_2+_CVScan_Turbo', '.ipynb_checkpoints', '20160602_1249_ME8_3+_CVscan_NCE35_Turbo', 'SU4_2+_TurboCVscan_NCE20_20160224_1604', '20160629_1625_ME15_2+_CVscan_NCE35', '20160511_2003_ME17_2+_CVscan_Turbo', '20160708_1747_UN14_2+_0,01mM_CVscan_Turbo', '20160629_1738_ME15_3+_CVscan_NCE35', '20160622_1448_ME14_2+_1to2500_CVscan_NCE35', '20160428_2100_ME16_3+_CVscan_NCE35_Turbo', '7255_2d-PC-MS_5pmol-ul_AGCstepping_0.8AGC_0-7ltqiso_CID', '20160503_1649_ME4_2+_CVscan_NCE35_Turbo', '7255_2d-PC-MS_5pmol-ul_Energystepping_1AGC_0-7ltqiso_25HCD', '7255_2d-PC-MS_10pmol-ul_1AGC_0-7quadiso_CID', 'PH8_2+_CVscan_NCE35_Turbo_20160505

./peptide output/Test/20160603_1005_ME9_2+_CVscan_NCE35_Turbo/10000 scans/


In [10]:
import os
import numpy as np
import pC2DMS
import itertools
import gc
import torch

numscan_list = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

for i in numscan_list[::-1]:
    scan_low = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(i) + ' scans/')
    if os.path.exists(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(i) + ' scans/', 'top_indices_tic.npy')):
        indexlist = np.load(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(i) + ' scans/', 'top_indices_tic.npy'))
    else:
        cmap = pC2DMS.PCovMap(scan_low, scan_low.tic(), numScans=i)
        indexlist = cmap.topNfeats(3000)
        topfeat = cmap.sampleFeatsIndex(indexlist)
        topfeat_sorted = topfeat[np.flip(topfeat[:, 3].argsort())]
        np.save(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(i) + ' scans/', 'top_indices_tic.npy'), topfeat_sorted)
        np.savetxt(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + list(datafile_dict.values())[2][1] + '/') + str(i) + ' scans/', 'top_indices_tic.csv'), topfeat_sorted, fmt='%.2f', delimiter=',')


In [8]:
if args.run_all:
    run(list(datafile_dict)[2], [list(datafile_dict.values())[2][2]], numscanInterval=1000, weights=args.weights, epoch=args.epoch)

/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/3000 scans
map time 2.557283401489258
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/6000 scans
map time 2.779189109802246
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/9000 scans
map time 2.657087564468384
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/2000 scans
map time 2.1841752529144287
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide output/Test/6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD/10000 scans
map time 2.546226978302002
Feature file exists. Features loaded. Analysis skipped.
/rds/general/user/ww1922/home/src/peptide o

In [None]:
if args.run_all:
    run(list(datafile_dict)[2], [list(datafile_dict.values())[2][3]], numscanInterval=1000, weights=args.weights, epoch=args.epoch)    
    

/rds/general/user/ww1922/home/src/peptide output/Test/20160603_1040_ME9_3+_CVscan_NCE35_Turbo/3000 scans
map time 0.5093557834625244
found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features


In [11]:
# To prove the code is forward compatible

datafile_list=['6727_2D-MS-PC_turbo']

'''if Path('./peptide output/CID/' + str(datafile_list[0])).is_dir():
    shutil.rmtree('./peptide output/CID/' + str(datafile_list[0]))
'''
for prefix in datafile_list:
    for mode in ['tic']:
        cmap = run_covmap(prefix, mode, data_path=os.path.join('../IC/IC/raw data', 'CID', ''), cache_path=os.path.join('./peptide output', 'CID'+'/'))

path: ./peptide output/CID/6727_2D-MS-PC_turbo/10000 scans
all scans
save in ./peptide output/CID/6727_2D-MS-PC_turbo/10000 scans/save
Reading file ../IC/IC/raw data/CID/6727_2D-MS-PC_turbo.txt to ./peptide output/CID/6727_2D-MS-PC_turbo/10000 scans
Reading scan number 100



KeyboardInterrupt



In [9]:
name = '7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso'
name1 = '6727_2d-PC-MS_1pmol-ul_1AGC_0-7quadiso_HCD'

In [10]:
if args.run_all:
    run(list(datafile_dict)[3], [str(name)], numscanInterval=5000, weights=args.weights, epoch=args.epoch)

path: ./peptide output/HCD/7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso/10000 scans
all scans
save in ./peptide output/HCD/7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso/10000 scans/save
Reading file ../IC/IC/raw data/HCD/7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso.txt to ./peptide output/HCD/7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso/10000 scans
Reading scan number 100
Reading scan number 200
Reading scan number 300
Reading scan number 400
Reading scan number 500
Reading scan number 600
Reading scan number 700
Reading scan number 800
Reading scan number 900
Reading scan number 1000
Reading scan number 1100
Reading scan number 1200
Reading scan number 1300
Reading scan number 1400
Reading scan number 1500
Reading scan number 1600
Reading scan number 1700
Reading scan number 1800
Reading scan number 1900
Reading scan number 2000
Reading scan number 2100
Reading scan number 2200
Reading scan number 2300
Reading scan number 2400
Reading scan number 2500
Reading scan number 2600
Reading scan number 2700
Reading 

2023-08-22 04:45:02,373 - root - INFO - The running time of 7302_2D-PC-MS_1pmol-ul_1AGC_0-7quadiso: 3:36:21.177922


sig calculated for feature 3000


In [10]:
import os
import numpy as np
import pC2DMS
import itertools
import gc
import torch

numscan_list = [10000]

for i in numscan_list[::-1]:
    scan_low = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(i) + ' scans/')
    if os.path.exists(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(i) + ' scans/', 'top_indices_tic.npy')):
        indexlist = np.load(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(i) + ' scans/', 'top_indices_tic.npy'))
    else:
        cmap = pC2DMS.PCovMap(scan_low, scan_low.tic(), numScans=i)
        indexlist = cmap.topNfeats(3000)
        topfeat = cmap.sampleFeatsIndex(indexlist)
        topfeat_sorted = topfeat[np.flip(topfeat[:, 3].argsort())]
        np.save(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(i) + ' scans/', 'top_indices_tic.npy'), topfeat_sorted)
        np.savetxt(os.path.join(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(i) + ' scans/', 'top_indices_tic.csv'), topfeat_sorted, fmt='%.2f', delimiter=',')


In [11]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_tpe.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func2,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [11]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_tpe_loss_func_1.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func1,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [12]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_tpe_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func3,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [10]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_tpe_loss_func_4.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc([optimizer.best_params[f'w_{i}'] for i in range(len(weights))]), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_tpe(loss_func4,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_cma.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(optimizer.result.xbest), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func2,bins:100).npy', top_feat)

found 100 good features
found 200 good features
found 300 good features
found 400 good features
found 500 good features
found 600 good features
found 700 good features
found 800 good features
found 900 good features
found 1000 good features
found 1100 good features
found 1200 good features
found 1300 good features
found 1400 good features
found 1500 good features
found 1600 good features
found 1700 good features
found 1800 good features
found 1900 good features
found 2000 good features
found 2100 good features
found 2200 good features
found 2300 good features
found 2400 good features
found 2500 good features
found 2600 good features
found 2700 good features
found 2800 good features
found 2900 good features
found 3000 good features
sig calculated for feature 50
sig calculated for feature 100
sig calculated for feature 150
sig calculated for feature 200
sig calculated for feature 250
sig calculated for feature 300
sig calculated for feature 350
sig calculated for feature 400
sig calculat

In [None]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_cma_loss_func1.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(optimizer.result.xbest), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func1,bins:100).npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_cma_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(optimizer.result.xbest), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func3,bins:100).npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_cma_loss_func4.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(optimizer.result.xbest), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_cma(loss_func4,bins:100).npy', top_feat)

In [None]:
import dill
import pC2DMS
weights = np.load(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'weights_tpe.npy')
with open(os.path.join('./peptide output', list(datafile_dict.keys())[2] + '/' + str(name1) + '/') + str(10000)+ ' scans/' + 'optimizer_bayopt_loss_func3.dill', "rb") as file:
    optimizer = dill.load(file)
scan1 = pC2DMS.Scan(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/')
cmap = pC2DMS.PCovMap(scan1, scan1.weightFunc(np.array([optimizer.max['params'][f'w_{i}'] for i in range(len(weights))])), numScans=10000)
top_feat = cmap.analyse(3000)
np.save(os.path.join('./peptide output', list(datafile_dict.keys())[3] + '/' + str(name) + '/') + str(10000)+ ' scans/w_top_feat_bayopt(loss_func3,bins:100).npy', top_feat)

In [None]:
stop = datetime.now()
logging.info(f"The running time of the whole file: {stop - start}")