In [12]:
import numpy as np
import matplotlib.pyplot as plt

def lorentzian(x, center, intensity, width):
    return intensity / (1 + ((x - center) / width)**2)

def simulate_spectrum(peaks: list):
    '''
    Examples:
        peaks_1 = [
            {'type': 'singlet', 'position': 0, 'intensity': 1, 'width': 0.1},
            {'type': 'doublet', 'position': 3, 'intensity': 0.8, 'width': 0.1},
            {'type': 'triplet', 'position': 2, 'intensity': 0.5, 'width': 0.1},
            {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
            {'type': 'multiplet', 'position': 7, 'intensity': 0.6, 'width': 0.1}
        ]

        peaks_2 = [
            {'type': 'singlet', 'position': 0.1, 'intensity': 1.8, 'width': 0.1},
            {'type': 'doublet', 'position': 2.98, 'intensity': 1.3, 'width': 0.1},
            {'type': 'triplet', 'position': 2.14, 'intensity': 0.9, 'width': 0.1},
            {'type': 'quartet', 'position': 5.18, 'intensity': 0.3, 'width': 0.1},
            {'type': 'multiplet', 'position': 7.14, 'intensity': 0.6, 'width': 0.1}
        ]


        ppm_values_1, spectrum_1 = simulate_spectrum(peaks_1)
        ppm_values_2, spectrum_2 = simulate_spectrum(peaks_2)


        plt.figure(figsize=(10, 6))
        plt.plot(ppm_values_1, spectrum_1, label='Simulated NMR Spectrum 1')
        #Add more spectra line
        plt.plot(ppm_values_2, spectrum_2, label='Simulated NMR Spectrum 2')
        plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
        plt.xlabel('Chemical Shift (ppm)')
        plt.ylabel('Intensity')
        plt.title('Simulated NMR Spectrum (Unalign)')
        plt.legend()
        plt.show()
    
    '''

    ppm_values = np.linspace(-10, 10, 1000)
    spectrum = np.zeros_like(ppm_values)
    
    for peak in peaks:
        if peak['type'] == 'singlet':
            spectrum += lorentzian(ppm_values, peak['position'], peak['intensity'], peak['width'])
        elif peak['type'] == 'doublet':
            for i in range(-1, 2):
                spectrum += lorentzian(ppm_values, peak['position'] + i * 0.5, peak['intensity'] / 2, peak['width'])
        elif peak['type'] == 'triplet':
            for i in range(-1, 2):
                spectrum += lorentzian(ppm_values, peak['position'] + i * (2/3), peak['intensity'] / 3, peak['width'])
        elif peak['type'] == 'quartet':
            for i in range(-1, 2):
                spectrum += lorentzian(ppm_values, peak['position'] + i * 0.75, peak['intensity'] / 4, peak['width'])
        elif peak['type'] == 'multiplet':
            for i in range(-2, 3):
                spectrum += lorentzian(ppm_values, peak['position'] + i, peak['intensity'] / 5, peak['width'] * 2)
    
    return ppm_values, spectrum

In [None]:
peaks_1 = [
    {'type': 'singlet', 'position': 0, 'intensity': 1, 'width': 0.1},
    {'type': 'doublet', 'position': 3, 'intensity': 0.8, 'width': 0.1},
    {'type': 'triplet', 'position': 2, 'intensity': 0.5, 'width': 0.1},
    {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7, 'intensity': 0.6, 'width': 0.1}
]

peaks_2 = [
    {'type': 'singlet', 'position': 0.1, 'intensity': 1.8, 'width': 0.1},
    {'type': 'doublet', 'position': 2.98, 'intensity': 1.3, 'width': 0.1},
    {'type': 'triplet', 'position': 2.14, 'intensity': 0.9, 'width': 0.1},
    {'type': 'quartet', 'position': 5.18, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7.14, 'intensity': 0.6, 'width': 0.1}
]

peaks_3 = [
    {'type': 'singlet', 'position': 0.1, 'intensity': 0.8, 'width': 0.1},
    {'type': 'doublet', 'position': 2.08, 'intensity': 1.2, 'width': 0.1},
    {'type': 'triplet', 'position': 2.54, 'intensity': 1.4, 'width': 0.1},
    {'type': 'quartet', 'position': 5.98, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7.84, 'intensity': 0.45, 'width': 0.1}
]

ppm_values_1, spectrum_1 = simulate_spectrum(peaks_1)
ppm_values_2, spectrum_2 = simulate_spectrum(peaks_2)
ppm_values_3, spectrum_3 = simulate_spectrum(peaks_3)


plt.figure(figsize=(10, 6))
plt.plot(ppm_values_1, spectrum_1, label='Simulated NMR Spectrum 1')
#Add more spectra line
plt.plot(ppm_values_2, spectrum_2, label='Simulated NMR Spectrum 2')
plt.plot(ppm_values_3, spectrum_3, label='Simulated NMR Spectrum 3')
plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.title('Simulated NMR Spectrum (Unalign)')
plt.legend()
plt.show()

In [None]:
peaks_1 = [
    {'type': 'singlet', 'position': 0, 'intensity': 1, 'width': 0.1},
    {'type': 'doublet', 'position': 3, 'intensity': 0.8, 'width': 0.1},
    {'type': 'triplet', 'position': 2, 'intensity': 0.5, 'width': 0.1},
    {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7, 'intensity': 0.6, 'width': 0.1}
]
peaks_2 = [
    {'type': 'singlet', 'position': 0, 'intensity': 1.8, 'width': 0.1},
    {'type': 'doublet', 'position': 3, 'intensity': 1.3, 'width': 0.1},
    {'type': 'triplet', 'position': 2, 'intensity': 0.9, 'width': 0.1},
    {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7, 'intensity': 0.6, 'width': 0.1}
]

peaks_3 = [
    {'type': 'singlet', 'position': 0, 'intensity': 0.8, 'width': 0.1},
    {'type': 'doublet', 'position': 3, 'intensity': 1.2, 'width': 0.1},
    {'type': 'triplet', 'position': 2, 'intensity': 1.4, 'width': 0.1},
    {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7, 'intensity': 0.45, 'width': 0.1}
]

ppm_values_1, spectrum_1 = simulate_spectrum(peaks_1)
ppm_values_2, spectrum_2 = simulate_spectrum(peaks_2)
ppm_values_3, spectrum_3 = simulate_spectrum(peaks_3)


plt.figure(figsize=(10, 6))
plt.plot(ppm_values_1, spectrum_1, label='Simulated NMR Spectrum 1')
#Add more spectra line
plt.plot(ppm_values_2, spectrum_2, label='Simulated NMR Spectrum 2')
plt.plot(ppm_values_3, spectrum_3, label='Simulated NMR Spectrum 3')
plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.title('Simulated NMR Spectrum (Aligned)')
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define a Lorentzian function
def lorentzian(x, center, intensity, width):
    return intensity / (1 + ((x - center) / width) ** 2)

# Simulate NMR spectra
def simulate_spectrum(peaks: list):
    ppm_values = np.linspace(-10, 10, 1000)  # Chemical shift range
    spectrum = np.zeros_like(ppm_values)
    
    for peak in peaks:
        if peak['type'] == 'singlet':
            spectrum += lorentzian(ppm_values, peak['position'], peak['intensity'], peak['width'])
        elif peak['type'] == 'doublet':
            for i in [-0.5, 0.5]:
                spectrum += lorentzian(ppm_values, peak['position'] + i, peak['intensity'] / 2, peak['width'])
        elif peak['type'] == 'triplet':
            for i in [-1 / 3, 0, 1 / 3]:
                spectrum += lorentzian(ppm_values, peak['position'] + i, peak['intensity'] / 3, peak['width'])
        elif peak['type'] == 'quartet':
            for i in [-0.75, -0.25, 0.25, 0.75]:
                spectrum += lorentzian(ppm_values, peak['position'] + i, peak['intensity'] / 4, peak['width'])
        elif peak['type'] == 'multiplet':
            for i in range(-2, 3):
                spectrum += lorentzian(ppm_values, peak['position'] + i * 0.2, peak['intensity'] / 5, peak['width'])
    
    return ppm_values, spectrum

# Define unaligned peaks
unaligned_peaks_1 = [
    {'type': 'singlet', 'position': 0, 'intensity': 1, 'width': 0.1},
    {'type': 'doublet', 'position': 3, 'intensity': 0.8, 'width': 0.1},
    {'type': 'triplet', 'position': 2, 'intensity': 0.5, 'width': 0.1},
    {'type': 'quartet', 'position': 5, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7, 'intensity': 0.6, 'width': 0.1}
]

unaligned_peaks_2 = [
    {'type': 'singlet', 'position': 0.1, 'intensity': 1.8, 'width': 0.1},
    {'type': 'doublet', 'position': 2.98, 'intensity': 1.3, 'width': 0.1},
    {'type': 'triplet', 'position': 2.14, 'intensity': 0.9, 'width': 0.1},
    {'type': 'quartet', 'position': 5.18, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7.14, 'intensity': 0.6, 'width': 0.1}
]

unaligned_peaks_3 = [
    {'type': 'singlet', 'position': 0.1, 'intensity': 0.8, 'width': 0.1},
    {'type': 'doublet', 'position': 2.08, 'intensity': 1.2, 'width': 0.1},
    {'type': 'triplet', 'position': 2.54, 'intensity': 1.4, 'width': 0.1},
    {'type': 'quartet', 'position': 5.98, 'intensity': 0.3, 'width': 0.1},
    {'type': 'multiplet', 'position': 7.84, 'intensity': 0.45, 'width': 0.1}
]

# Simulate spectra for unaligned and aligned data
ppm_values_1, spectrum_1 = simulate_spectrum(unaligned_peaks_1)
ppm_values_2, spectrum_2 = simulate_spectrum(unaligned_peaks_2)
ppm_values_3, spectrum_3 = simulate_spectrum(unaligned_peaks_3)

# Plot unaligned spectra
plt.figure(figsize=(10, 6))
plt.plot(ppm_values_1, spectrum_1, label='Simulated NMR Spectrum 1')
plt.plot(ppm_values_2, spectrum_2, label='Simulated NMR Spectrum 2')
plt.plot(ppm_values_3, spectrum_3, label='Simulated NMR Spectrum 3')
plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.title('Simulated NMR Spectrum (Unaligned)')
plt.legend()
plt.show()

In [None]:
import numpy as np
from scipy.signal import correlate
import matplotlib.pyplot as plt

def align_spectra(reference_spectrum, target_spectrum):
    """
    Aligns the target spectrum to the reference spectrum using cross-correlation.
    
    Parameters:
    reference_spectrum (np.ndarray): The reference spectrum to align to.
    target_spectrum (np.ndarray): The spectrum to be aligned.
    
    Returns:
    np.ndarray: The aligned target spectrum.
    """
    # Compute the cross-correlation of the two spectra
    correlation = correlate(target_spectrum, reference_spectrum, mode='full')
    
    # Find the index of the maximum correlation
    shift_index = np.argmax(correlation) - (len(reference_spectrum) - 1)
    
    # Shift the target spectrum to align with the reference spectrum
    aligned_spectrum = np.roll(target_spectrum, shift_index)
    
    # Ensure the aligned spectrum has the same length as the reference spectrum
    if len(aligned_spectrum) > len(reference_spectrum):
        aligned_spectrum = aligned_spectrum[:len(reference_spectrum)]
    else:
        aligned_spectrum = np.pad(aligned_spectrum, (0, len(reference_spectrum) - len(aligned_spectrum)), 'constant')
    
    return aligned_spectrum

# Example usage:
# Assuming spectrum_1, spectrum_3, and ppm_values_1 are defined
aligned_spectrum_3 = align_spectra(spectrum_1, spectrum_3)

# Ensure ppm_values_1 and aligned_spectrum_3 have the same length
if len(ppm_values_1) != len(aligned_spectrum_3):
    min_length = min(len(ppm_values_1), len(aligned_spectrum_3))
    ppm_values_1 = ppm_values_1[:min_length]
    aligned_spectrum_3 = aligned_spectrum_3[:min_length]

plt.figure(figsize=(10, 6))
plt.plot(ppm_values_1, spectrum_1, label='Reference Spectrum 1')
plt.plot(ppm_values_1, aligned_spectrum_3, label='Aligned Spectrum 3')
plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.title('Aligned NMR Spectrum')
plt.legend()
plt.show()

In [None]:
import numpy as np
from scipy.signal import correlate
import matplotlib.pyplot as plt

def segment_spectrum(spectrum, segment_size):
    """
    Segments the spectrum into smaller segments of given size.
    
    Parameters:
    spectrum (np.ndarray): The spectrum to be segmented.
    segment_size (int): The size of each segment.
    
    Returns:
    list: A list of segments.
    """
    return [spectrum[i:i + segment_size] for i in range(0, len(spectrum), segment_size)]

def align_segment(reference_segment, target_segment):
    """
    Aligns a target segment to a reference segment using cross-correlation.
    
    Parameters:
    reference_segment (np.ndarray): The reference segment to align to.
    target_segment (np.ndarray): The segment to be aligned.
    
    Returns:
    np.ndarray: The aligned target segment.
    """
    correlation = correlate(target_segment, reference_segment, mode='full')
    shift_index = np.argmax(correlation) - (len(reference_segment) - 1)
    aligned_segment = np.roll(target_segment, shift_index)
    return aligned_segment

def align_spectra(reference_spectrum, target_spectrum, segment_size):
    """
    Aligns the target spectrum to the reference spectrum using auto-segmentation alignment.
    
    Parameters:
    reference_spectrum (np.ndarray): The reference spectrum to align to.
    target_spectrum (np.ndarray): The spectrum to be aligned.
    segment_size (int): The size of each segment.
    
    Returns:
    np.ndarray: The aligned target spectrum.
    """
    reference_segments = segment_spectrum(reference_spectrum, segment_size)
    target_segments = segment_spectrum(target_spectrum, segment_size)
    
    aligned_segments = []
    for ref_seg, tgt_seg in zip(reference_segments, target_segments):
        if len(tgt_seg) < len(ref_seg):
            tgt_seg = np.pad(tgt_seg, (0, len(ref_seg) - len(tgt_seg)), 'constant')
        aligned_segments.append(align_segment(ref_seg, tgt_seg))
    
    # Combine aligned segments
    aligned_spectrum = np.concatenate(aligned_segments)
    
    # Ensure the aligned spectrum has the same length as the reference spectrum
    if len(aligned_spectrum) > len(reference_spectrum):
        aligned_spectrum = aligned_spectrum[:len(reference_spectrum)]
    else:
        aligned_spectrum = np.pad(aligned_spectrum, (0, len(reference_spectrum) - len(aligned_spectrum)), 'constant')
    
    return aligned_spectrum

# Example usage:
# Assuming spectrum_1, spectrum_3, and ppm_values_1 are defined
segment_size = 100  # Define an appropriate segment size
aligned_spectrum_3 = align_spectra(spectrum_1, spectrum_3, segment_size)

# Ensure ppm_values_1 and aligned_spectrum_3 have the same length
min_length = min(len(ppm_values_1), len(aligned_spectrum_3))
ppm_values_1 = ppm_values_1[:min_length]
aligned_spectrum_3 = aligned_spectrum_3[:min_length]

plt.figure(figsize=(10, 6))
plt.plot(ppm_values_1, spectrum_1, label='Reference Spectrum 1')
plt.plot(ppm_values_1, aligned_spectrum_3, label='Aligned Spectrum 3')
plt.gca().invert_xaxis()  # NMR spectra typically have ppm decreasing from left to right
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.title('Aligned NMR Spectrum')
plt.legend()
plt.show()

In [None]:
!pip install dtaidistance

In [8]:
import pandas as pd

In [10]:
df = pd.read_csv('./test.csv')
df = df.iloc[:, :-1]

In [None]:
from dtaidistance import dtw
import numpy as np
import matplotlib.pyplot as plt

# Example spectra (replace with your actual spectra data)
spectra1 = np.array(spectrum_1)
spectra2 = np.array(spectrum_3)

# Compute DTW distance
distance = dtw.distance(spectra1, spectra2)
print(f"DTW distance: {distance}")

# Optionally, align the spectra (this part can be more involved depending on your needs)
alignment = dtw.warping_path(spectra1, spectra2)

# Plot the spectra and alignment
plt.plot(spectra1, label="Spectra 1")
plt.plot(spectra2, label="Spectra 2")
plt.legend()
plt.show()

In [1]:
import pandas as pd

df = pd.read_csv('./test.csv')
df = df.iloc[:, :-1]
ppm = df.columns.astype(float)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from dtaidistance import dtw
from functools import partial

# Load the data
df = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/test.csv')
df = df.iloc[:, :-1]
ppm_values = df.columns.astype(float)
spectra = df.to_numpy()

# Compute the reference spectrum as the median
ref = np.median(spectra, axis=0)

# Function to downsample data
def downsample(array, factor):
    return array[::factor]

# Wrap the downsample function to include the factor
downsample_factor = 2  # Adjust as needed
downsample_partial = partial(downsample, factor=downsample_factor)

# Apply downsampling
spectra_downsampled = np.apply_along_axis(downsample_partial, 1, spectra)
ref_downsampled = downsample(ref, downsample_factor)
ppm_values_downsampled = downsample(ppm_values, downsample_factor)

# Function to align a spectrum using DTW with window constraint
def align_spectrum(reference, target, window=None):
    alignment_path = dtw.warping_path(reference, target, use_c=True, window=window)
    aligned_target = np.zeros_like(reference)
    for i, j in alignment_path:
        aligned_target[i] = target[j]
    return aligned_target

# Perform alignment on downsampled data
window = int(len(ref_downsampled) * 0.1)  # 10% of the spectrum length
aligned_spectra_downsampled = np.zeros_like(spectra_downsampled)

for i in range(spectra_downsampled.shape[0]):
    aligned_spectra_downsampled[i] = align_spectrum(ref_downsampled, spectra_downsampled[i], window=window)

# Reconstruct the full-resolution aligned spectra (if needed)
aligned_spectra = np.zeros_like(spectra)
for i in range(spectra.shape[0]):
    aligned_spectra[i] = np.interp(ppm_values, ppm_values_downsampled, aligned_spectra_downsampled[i])

# Plot original and aligned spectra
plt.figure(figsize=(12, 8))

# Plot original spectra
plt.subplot(2, 1, 1)
for i in range(spectra.shape[0]):
    plt.plot(ppm_values, spectra[i], label=f'Unaligned Spectrum {i+1}')
plt.gca().invert_xaxis()
plt.title('Original Spectra (Unaligned)')
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.legend()

# Plot aligned spectra
plt.subplot(2, 1, 2)
for i in range(aligned_spectra.shape[0]):
    plt.plot(ppm_values, aligned_spectra[i], label=f'Aligned Spectrum {i+1}')
plt.gca().invert_xaxis()
plt.title('Aligned Spectra')
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.legend()

plt.tight_layout()
plt.show()

# Save aligned spectra to CSV for further analysis
aligned_df = pd.DataFrame(aligned_spectra, columns=ppm_values)
aligned_df.to_csv('/Users/aeiwz/Github/metbit/metbit/dev/aligned_spectra.csv', index=False)

In [None]:
aligned_spectra.shape

In [3]:
from lingress import plot_NMR_spec

In [None]:
plot_NMR_spec(spectra=spectra, ppm = ppm_values, label=None).single_spectra()

In [None]:
plot_NMR_spec(spectra=aligned_spectra, ppm = ppm_values, label=None).single_spectra()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fastdtw import fastdtw  # Faster DTW implementation
from scipy.spatial.distance import euclidean
from multiprocessing import Pool

def align_spectrum_segment(reference, target, start_idx, end_idx):
    segment_ref = reference[start_idx:end_idx]
    segment_target = target[start_idx:end_idx]
    # Using fastdtw (approximate DTW) to speed up the process
    distance, path = fastdtw(segment_ref, segment_target, dist=euclidean)
    aligned_segment = np.zeros_like(segment_ref)
    for i, j in path:
        aligned_segment[i] = segment_target[j]
    return aligned_segment

# Function to process each spectrum segment
def align_spectrum_in_segments(index, ref, spectra, num_segments=10):
    aligned_spectrum = np.zeros_like(ref)
    segment_length = len(ref) // num_segments
    
    for i in range(num_segments):
        start_idx = i * segment_length
        end_idx = (i + 1) * segment_length if i < num_segments - 1 else len(ref)
        aligned_spectrum[start_idx:end_idx] = align_spectrum_segment(ref, spectra[index], start_idx, end_idx)
        
    return aligned_spectrum

# Parallel processing for faster alignment using multiprocessing
def parallel_align(index, ref, spectra):
    return align_spectrum_in_segments(index, ref, spectra)

def main():
    # Load the data
    df = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/test.csv')
    df = df.iloc[:, :-1]
    ppm_values = df.columns.astype(float)
    spectra = df.to_numpy()

    # Compute the reference spectrum as the median
    ref = np.median(spectra, axis=0)

    # Perform alignment on full-resolution data with parallel processing
    num_cores = 4  # Adjust based on your system's CPU cores
    aligned_spectra = []

    # Use a pool of workers to align spectra in parallel (batch processing)
    batch_size = 10  # Process 10 spectra at a time
    for i in range(0, spectra.shape[0], batch_size):
        batch = range(i, min(i + batch_size, spectra.shape[0]))
        with Pool(processes=num_cores) as pool:
            aligned_batch = pool.starmap(parallel_align, [(index, ref, spectra) for index in batch])
        aligned_spectra.extend(aligned_batch)

    aligned_spectra = np.array(aligned_spectra)

    # Plot original and aligned spectra
    plt.figure(figsize=(12, 8))

    # Plot original spectra
    plt.subplot(2, 1, 1)
    for i in range(spectra.shape[0]):
        plt.plot(ppm_values, spectra[i], label=f'Unaligned Spectrum {i+1}')
    plt.gca().invert_xaxis()
    plt.title('Original Spectra (Unaligned)')
    plt.xlabel('Chemical Shift (ppm)')
    plt.ylabel('Intensity')
    plt.legend()

    # Plot aligned spectra
    plt.subplot(2, 1, 2)
    for i in range(aligned_spectra.shape[0]):
        plt.plot(ppm_values, aligned_spectra[i], label=f'Aligned Spectrum {i+1}')
    plt.gca().invert_xaxis()
    plt.title('Aligned Spectra')
    plt.xlabel('Chemical Shift (ppm)')
    plt.ylabel('Intensity')
    plt.legend()

    plt.tight_layout()
    plt.show()

    # Save aligned spectra to CSV for further analysis
    aligned_df = pd.DataFrame(aligned_spectra, columns=ppm_values)
    aligned_df.to_csv('/Users/aeiwz/Github/metbit/metbit/dev/aligned_spectra.csv', index=False)

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from concurrent.futures import ProcessPoolExecutor

def align_spectrum_segment(reference, target, start_idx, end_idx):
    segment_ref = reference[start_idx:end_idx]
    segment_target = target[start_idx:end_idx]
    # Using fastdtw (approximate DTW) to speed up the process
    distance, path = fastdtw(segment_ref, segment_target, dist=euclidean)
    aligned_segment = np.zeros_like(segment_ref)
    for i, j in path:
        aligned_segment[i] = segment_target[j]
    return aligned_segment

# Function to process each spectrum segment
def align_spectrum_in_segments(index, ref, spectra, num_segments=10):
    aligned_spectrum = np.zeros_like(ref)
    segment_length = len(ref) // num_segments
    
    for i in range(num_segments):
        start_idx = i * segment_length
        end_idx = (i + 1) * segment_length if i < num_segments - 1 else len(ref)
        aligned_spectrum[start_idx:end_idx] = align_spectrum_segment(ref, spectra[index], start_idx, end_idx)
        
    return aligned_spectrum

# Parallel processing for faster alignment using ProcessPoolExecutor
def parallel_align(index, ref, spectra):
    return align_spectrum_in_segments(index, ref, spectra)

def main():
    try:
        # Load the data
        df = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/test.csv')
        df = df.iloc[:, :-1]
        ppm_values = df.columns.astype(float)
        spectra = df.to_numpy()

        # Compute the reference spectrum as the median
        ref = np.median(spectra, axis=0)

        # Perform alignment on full-resolution data with parallel processing
        num_cores = 4  # Adjust based on your system's CPU cores
        aligned_spectra = []

        # Use concurrent.futures to align spectra in parallel (batch processing)
        batch_size = 10  # Process 10 spectra at a time
        with ProcessPoolExecutor(max_workers=num_cores) as executor:
            futures = []
            for i in range(0, spectra.shape[0], batch_size):
                batch = range(i, min(i + batch_size, spectra.shape[0]))
                for index in batch:
                    futures.append(executor.submit(parallel_align, index, ref, spectra))
            
            # Collect aligned spectra from futures
            for future in futures:
                aligned_spectra.append(future.result())

        aligned_spectra = np.array(aligned_spectra)

        # Plot original and aligned spectra
        plt.figure(figsize=(12, 8))

        # Plot original spectra
        plt.subplot(2, 1, 1)
        for i in range(spectra.shape[0]):
            plt.plot(ppm_values, spectra[i], label=f'Unaligned Spectrum {i+1}')
        plt.gca().invert_xaxis()
        plt.title('Original Spectra (Unaligned)')
        plt.xlabel('Chemical Shift (ppm)')
        plt.ylabel('Intensity')
        plt.legend()

        # Plot aligned spectra
        plt.subplot(2, 1, 2)
        for i in range(aligned_spectra.shape[0]):
            plt.plot(ppm_values, aligned_spectra[i], label=f'Aligned Spectrum {i+1}')
        plt.gca().invert_xaxis()
        plt.title('Aligned Spectra')
        plt.xlabel('Chemical Shift (ppm)')
        plt.ylabel('Intensity')
        plt.legend()

        plt.tight_layout()
        plt.show()

        # Save aligned spectra to CSV for further analysis
        aligned_df = pd.DataFrame(aligned_spectra, columns=ppm_values)
        aligned_df.to_csv('/Users/aeiwz/Github/metbit/metbit/dev/aligned_spectra.csv', index=False)

    except Exception as e:
        print(f"Error occurred: {e}")

if __name__ == "__main__":
    main()

In [None]:
df = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/test.csv')
df = df.iloc[:, :-1]
ppm_values = df.columns.astype(float)
spectra = df.to_numpy()

# Compute the reference spectrum as the median
ref = np.median(spectra, axis=0)

# Perform alignment on full-resolution data with parallel processing
num_cores = 4  # Adjust based on your system's CPU cores
aligned_spectra = []

# Use concurrent.futures to align spectra in parallel (batch processing)
batch_size = 10  # Process 10 spectra at a time
with ProcessPoolExecutor(max_workers=num_cores) as executor:
    futures = []
    for i in range(0, spectra.shape[0], batch_size):
        batch = range(i, min(i + batch_size, spectra.shape[0]))
        for index in batch:
            futures.append(executor.submit(parallel_align, index, ref, spectra))
    
    # Collect aligned spectra from futures
    for future in futures:
        aligned_spectra.append(future.result())

aligned_spectra = np.array(aligned_spectra)

# Plot original and aligned spectra
plt.figure(figsize=(12, 8))

# Plot original spectra
plt.subplot(2, 1, 1)
for i in range(spectra.shape[0]):
    plt.plot(ppm_values, spectra[i], label=f'Unaligned Spectrum {i+1}')
plt.gca().invert_xaxis()
plt.title('Original Spectra (Unaligned)')
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.legend()

# Plot aligned spectra
plt.subplot(2, 1, 2)
for i in range(aligned_spectra.shape[0]):
    plt.plot(ppm_values, aligned_spectra[i], label=f'Aligned Spectrum {i+1}')
plt.gca().invert_xaxis()
plt.title('Aligned Spectra')
plt.xlabel('Chemical Shift (ppm)')
plt.ylabel('Intensity')
plt.legend()

plt.tight_layout()
plt.show()

# Save aligned spectra to CSV for further analysis
aligned_df = pd.DataFrame(aligned_spectra, columns=ppm_values)
aligned_df.to_csv('/Users/aeiwz/Github/metbit/metbit/dev/aligned_spectra.csv', index=False)

In [14]:
import pandas as pd
from lingress import plot_NMR_spec

In [15]:
df1 = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/test.csv')
df1 = df1.iloc[:, :-1]
ppm = df1.columns.astype(float)
df2 = pd.read_csv('/Users/aeiwz/Github/metbit/metbit/dev/aligned_spectra.csv')


In [None]:
plo

In [None]:
plot_NMR_spec(spectra=df1, ppm = ppm, label=None).single_spectra()

In [None]:
plot_NMR_spec(spectra=df2, ppm = ppm, label=None).single_spectra()

In [1]:
import pandas as pd
import numpy as np

#random data for 100 x 1000
df = pd.DataFrame(np.random.rand(100, 1000))


In [3]:
df.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
82,0.796301,0.542752,0.879767,0.414274,0.852321,0.962625,0.364203,0.568212,0.753244,0.97779,...,0.08803,0.160424,0.540365,0.595766,0.778216,0.797282,0.394352,0.677654,0.846243,0.515333
59,0.441169,0.311394,0.967183,0.116487,0.850167,0.459365,0.421279,0.790957,0.904389,0.951303,...,0.401897,0.975459,0.46486,0.334351,0.316639,0.301374,0.97111,0.036026,0.527075,0.690774
1,0.422536,0.348928,0.213346,0.894253,0.728559,0.804165,0.519382,0.575276,0.737128,0.634928,...,0.924129,0.692955,0.803428,0.780486,0.658131,0.982438,0.032135,0.142376,0.003578,0.289351
18,0.83607,0.834373,0.66303,0.902761,0.033723,0.99107,0.224448,0.37166,0.411484,0.09774,...,0.636109,0.354969,0.240294,0.233445,0.127109,0.79413,0.098487,0.581847,0.512348,0.324953
56,0.151065,0.182319,0.31786,0.877142,0.212846,0.582252,0.544011,0.18796,0.313838,0.009722,...,0.205116,0.911063,0.001677,0.074687,0.217369,0.856486,0.644676,0.731753,0.322994,0.770325


In [12]:
d = [1,23,25,4,6,4,1,1,2]
f = [str(x) for x in d]
list(set(d)), list(set(f))

([1, 2, 4, 6, 23, 25], ['4', '2', '1', '23', '25', '6'])

In [19]:
import numpy as np

In [21]:
import pandas as pd 

df = pd.read_csv("https://raw.githubusercontent.com/aeiwz/example_data/main/dataset/Example_NMR_data.csv")
spectra = df.iloc[:,1:]
ppm = spectra.columns.astype(float)

In [22]:
df.columns

Index(['Group', '0.60075', '0.60125', '0.60175', '0.60225', '0.60275',
       '0.60325', '0.60375', '0.60425', '0.60475',
       ...
       '4.20125', '4.20175', '4.20225', '4.20275', '4.20325', '4.20375',
       '4.20425', '4.20475', '4.20525', '4.20575'],
      dtype='object', length=7212)

In [30]:

#find the columns index of the value
test = ppm.get_loc(min(ppm, key=lambda x: abs(x - 4.201)))

In [31]:
test

7200