In [None]:

import numpy as np
import multiprocessing as mp
import os

from src.psalign.alignment import Alignment
from src.psalign.imzml import convert_pyimzml
from src.psalign.mass_dispersion import compute_mass_dispersion

start_mz = None
end_mz = None
nb_cores = mp.cpu_count() // 2

instrument = 'orbitrap'
reference = None

nb_segments = 3
window = 4
factor = 1.25
outlier_detection = False

# Download .imzML and .ibd file from https://www.ebi.ac.uk/pride/archive/projects/PXD016146
path = '<path_to_your_data>\\5mixes_onratliver_50micron.imzML'

In [2]:
path, sample = os.path.split(path)
    
if not os.path.exists(f'{path}/{sample.split(".")[0]}.npz'):
    convert_pyimzml(f'{path}/{sample}', low_mz=start_mz, high_mz=end_mz, dtype=np.float32, distance=10, nb_of_peaks=1000)

In [3]:
width = None
distance = 10
nb_peaks = 100

file = np.load(f'{path}/{sample.split(".")[0]}.npz')
data = file['data']
mz = file['axis']

In [4]:
from concurrent.futures import ProcessPoolExecutor
with ProcessPoolExecutor(max_workers=mp.cpu_count() // 2) as executor:
    mass_dispersion = compute_mass_dispersion(np.copy(data), mz, distance, nb_peaks, width, executor=executor)
print('Mass dispersion before alignment:')
print(f'\nMass dispersion [ppm]:\n\tAverage:\t\t{np.format_float_positional(mass_dispersion[0], precision=2)}\n\tMedian:\t\t\t{np.format_float_positional(mass_dispersion[1], precision=2)}\n\tCosine similarity:\t{np.format_float_positional(mass_dispersion[2], precision=4)}\n')

# Applying sqrt transform can broaden the peaks which can be beneficial for alignment
data = np.sqrt(data)

Mass dispersion before alignment:

Mass dispersion [ppm]:
	Average:		0.7
	Median:			0.65
	Cosine similarity:	0.9897



In [5]:
# Perform coarse alignment with square root trick
alignment = Alignment(data, mz, reference, nb_cores, instrument)
alignment.limit_mz_range(start_mz, end_mz)

x, y = alignment.compute_warping_functions(nb_segments, window, factor, outlier_detection)

# Apply transform to non sqrt data
alignment.data = np.load(f'{path}/{sample.split(".")[0]}.npz')['data']

alignment.apply_warping_functions_to_data(x, y)

print('Mass dispersion after alignment without optimization:')
alignment.get_mass_dispersion(distance=distance, width=width, nb_of_peaks=nb_peaks)

del alignment

Compiling Numba functions: finished in 2.24 seconds!        


100%|██████████| 23823/23823 [00:09<00:00, 2599.85it/s]


The warping functions were computed in 9.52 seconds.


100%|██████████| 23823/23823 [00:10<00:00, 2262.77it/s]


The data was warped in 10.89 seconds.
Mass dispersion after alignment without optimization:

Mass dispersion [ppm]:
	Average:		0.65
	Median:			0.61
	Cosine similarity:	0.9924



In [6]:
# Perform alignment with square root trick
alignment = Alignment(data, mz, reference, nb_cores, instrument)
alignment.limit_mz_range(start_mz, end_mz)

x, y = alignment.compute_warping_functions_optimization(nb_segments, window, factor, outlier_detection, delta=0.05, only_opt=True)

# Apply transform to non sqrt data
alignment.data = np.load(f'{path}/{sample.split(".")[0]}.npz')['data']

alignment.apply_warping_functions_to_data(x, y)

print('Mass dispersion after alignment with optimization:')
alignment.get_mass_dispersion(distance=distance, width=width, nb_of_peaks=nb_peaks)

del alignment

Compiling Numba functions: finished in 1.97 seconds!        


100%|██████████| 23823/23823 [02:24<00:00, 164.46it/s]


The warping functions were computed in 145.25 seconds.


100%|██████████| 23823/23823 [00:10<00:00, 2301.74it/s]


The data was warped in 10.72 seconds.
Mass dispersion after alignment with optimization:

Mass dispersion [ppm]:
	Average:		0.3
	Median:			0.24
	Cosine similarity:	0.9968



In [7]:
# Perform alignment without square root trick
alignment = Alignment(np.load(f'{path}/{sample.split(".")[0]}.npz')['data'], mz, reference, nb_cores, instrument)
alignment.limit_mz_range(start_mz, end_mz)

x, y = alignment.compute_warping_functions_optimization(nb_segments, window, factor, outlier_detection, delta=0.05, only_opt=True)

alignment.data = np.load(f'{path}/{sample.split(".")[0]}.npz')['data']

alignment.apply_warping_functions_to_data(x, y)

print('Mass dispersion after alignment with optimization:')
alignment.get_mass_dispersion(distance=distance, width=width, nb_of_peaks=nb_peaks)

del alignment

Compiling Numba functions: finished in 2.05 seconds!        


100%|██████████| 23823/23823 [02:33<00:00, 154.97it/s]


The warping functions were computed in 154.11 seconds.


100%|██████████| 23823/23823 [00:10<00:00, 2285.51it/s]


The data was warped in 10.78 seconds.
Mass dispersion after alignment with optimization:

Mass dispersion [ppm]:
	Average:		0.46
	Median:			0.42
	Cosine similarity:	0.9968



In this case the square root trick helps the algorithm to achieve better alignment performance!