In [None]:
import numpy as np
import multiprocessing as mp
import os

from src.psalign.alignment import Alignment
from psalign.imzml import convert_pyimzml

# Download file from https://www.ebi.ac.uk/pride/archive/projects/PXD013069
path = '<path_to_file>/drugtreatedspheroids-nonormalization.imzML'

start_mz = 800
end_mz = 4500

In [2]:
path, sample = os.path.split(path)
    
if not os.path.exists(f'{path}/{sample.split(".")[0]}.npz'):
    
    convert_pyimzml(f'{path}/{sample}', low_mz=start_mz, high_mz=end_mz, dtype=np.float32)

In [3]:
nb_cores = mp.cpu_count() // 2

nb_of_peaks = 50

nb_segments = [3, 5]
window = [100, 200, 500, 1000, 2000]
factor = [1.01, 1.02, 1.05, 1.1, 1.2, 1.5, 2]

instrument = 'tof'
reference = None

In [4]:
file = np.load(f'{path}/{sample.split(".")[0]}.npz')
data = file['data']
mz = file['axis']

alignment = Alignment(np.copy(data), mz, reference, nb_cores, instrument)
alignment.limit_mz_range(start_mz, end_mz)

nb_segments, window, factor, outlier_detection = alignment.get_best_params(nb_segments, window, factor, nb_of_spectra=100, metric=0, verbose=False)

print(f'Best parameters:\n\tNumber of segments:\t{nb_segments}\n\tFactor:\t\t\t{factor}\n\tWindow:\t\t\t{window}\n\tOutlier detection:\t{outlier_detection}')

Best parameters:
	Number of segments:	5
	Factor:			1.01
	Window:			1000
	Outlier detection:	True


In [5]:
alignment.get_mass_dispersion(nb_of_peaks=nb_of_peaks)
alignment.align(nb_segments, window, factor, outlier_detection)
print('Mass dispersion after alignment without optimization:')
alignment.get_mass_dispersion(nb_of_peaks=nb_of_peaks)

del alignment

alignment = Alignment(data, mz, reference, nb_cores, instrument)
alignment.limit_mz_range(start_mz, end_mz)

alignment.align_optimization(nb_segments, window, factor, outlier_detection)
print('Mass dispersion after alignment with optimization:')
alignment.get_mass_dispersion(nb_of_peaks=nb_of_peaks)


Mass dispersion [ppm]:
	Average:		19.53
	Median:			19.02
Cosine similarity:		0.6532

Compiling Numba functions: finished in 1.81 seconds!        


100%|██████████| 1201/1201 [00:04<00:00, 264.67it/s]


The data was warped in 4.92 seconds.
Mass dispersion after alignment without optimization:

Mass dispersion [ppm]:
	Average:		11.67
	Median:			11.12
Cosine similarity:		0.7972

Compiling Numba functions: finished in 2.1 seconds!        


100%|██████████| 1201/1201 [01:20<00:00, 14.83it/s]


The data was warped in 81.38 seconds.
Mass dispersion after alignment with optimization:

Mass dispersion [ppm]:
	Average:		11.16
	Median:			10.54
Cosine similarity:		0.8079



(11.155086, 10.5391245, 0.80785286)