In [1]:
import numpy as np
import copy
import pickle

from scipy import ndimage
from BaselineRemoval import BaselineRemoval
import matplotlib.pyplot as plt
import os
from scipy import interpolate
from scipy import signal
from scipy.fft import fft, ifft, fftfreq
from sklearn.decomposition import PCA

plt.rcParams['figure.figsize'] = (20.0, 10.0)
# plt.rcParams['figure.figsize'] = (10.0, 5.0)
plt.rcParams['figure.dpi'] = 500
from IPython.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

Mogelijke verbeteringen:
 - maak de covariance afhankelijke van de noise 
 - draai de spectrum zodanig dat de globale sloop de x-as is. Zodat het min-max algorithme beter werkt. DONE
 - de smoothing die applied wordt door de covariantie moet afgesteld worden per sample

Problemen:
 - initializatie maakt uit voor de kalman smoother (hoeveel noise er is vs raman)
 - aantal iteratie maakt uit voor de kalman smoother (hoeveel noise er is vs raman)
 - de bounderies hebben geen invloed op de kalman smoother buiten alles binnen de bounderies houden.
   Tenzij smoothing ook aanstaat.
   
   
excitation laser maakt uit hoe de raman laser eruit ziet. Hoger (rood) betekend lower raman signal en een breder signal

date: 2-9-2022

 - stabilise the logic part with neighbourhood information
 - where the logic part is true make the line segments bigger (does not work)
 - find something to deterime the smoothness of the poly fit (no bumbs) DONE


### Ideas:

- Wavelet transform

### Errors:

 - smooth_grad fix the try except clause such that and the beginning and end of the spectrum it also works
 - using fourier transform it seems that gibbs phenomena is a problem


In [2]:
file_location = "../data/Raman_Mouse/corrected_4_wavenumbers/"
# file_location = "../data/Green_excitation/corrected_4_wavenumbers/"

filenames = np.load(f"{file_location}FileNames.npy")
with open(f"{'/'.join(file_location.split('/')[:-2])}/Sample_labels.pickle", 'rb') as f:
    labels = pickle.load(f)
labels = {l.split(".")[0]: val for l, val in labels.items()}

wavenumbers = np.load(f"{file_location}Wavenumbers.npy")
    
data = []
for f in filenames:
    x = np.load(f"{file_location}{f.split('.')[0]}.npy")
    data.append(((x.reshape(-1,x.shape[-1])).reshape(x.shape), labels[f.split(".")[0]]))
    
# #mouse image
# noise_amount = 1e5
# intervals = 10
# general_noise = 10e6
# poly_fit = 1e5
# precision = 12
# continue_gap = 100
# # green image
# noise_amount = 3e5
# general_noise = 5e6
# intervals = 30
# precision = 20
# poly_fit = 8e5
# continue_gap = 400

In [9]:
def remove_noise_fft_min(min_HWHM=5, spline_appr=5e9):
    k = int((wavenumbers[-1] - wavenumbers[0]) / (2*min_HWHM))
    def remove_noise_fft(x):       
        x_ = copy.copy(x)
        mean = np.mean(x)
        x_ -= mean
        fourier = fft(x_)

        fourier_real = fourier.real
        fourier_imag = fourier.imag
        
        space = np.arange(x.shape[0])

        func = interpolate.UnivariateSpline(space, fourier_real, k=4, s=spline_appr)
        fourier_real[k:-k] = func(space[k:-k])

        func = interpolate.UnivariateSpline(space, fourier_imag, k=4, s=spline_appr)
        fourier_imag[k:-k] = func(space[k:-k])

        smooth_fourier = fourier_real + 1j * fourier_imag

        return ifft(smooth_fourier) + mean
    return remove_noise_fft
remove_noise_fft = remove_noise_fft_min()

In [10]:
def gaussian(x, mu, sigma, scale):
    x = scale * np.exp(-0.5* ((x - mu) / sigma)**2)
    return x * (x > min(scale/10, 10))

def init_fit_gaussian(min_wavenumber=-1000, left_steps=10, min_sigma=100, max_sigma=1500, steps=20):
    def fit_gaussian(x):
        lst = []
        for mu in np.concatenate((np.linspace(-min_wavenumber, 0, left_steps), np.linspace(wavenumbers[0], wavenumbers[-1], steps))):
            temp_mu = 0,1,1
            temp_intergral = 0
            for sigma in np.linspace(min_sigma**0.5, max_sigma**0.5, steps)**2:
                if mu < 0 and mu + 3*sigma < 0:
                    continue

                scale = 10
                current = gaussian(wavenumbers, mu, sigma, scale)

                for delta in 10**np.arange(10, -1, -1):
                    while not np.sum((current - x) > 0):
                        scale += delta
                        current = gaussian(wavenumbers, mu, sigma, scale)
                    else:
                        scale -= delta
                        current = gaussian(wavenumbers, mu, sigma, scale)

                intergral = np.sum(current)
                if intergral > temp_intergral:
                    temp_intergral = intergral
                    temp_mu = mu, sigma, scale

            if temp_mu[1] < min_sigma: #no fit found
                continue

            for sigma in np.linspace(max(min_sigma, temp_mu[1]-50), temp_mu[1]+50, steps):
                if mu < 0 and mu + 3*sigma < 0:
                    continue
                scale = 10
                current = gaussian(wavenumbers, mu, sigma, scale)

                for delta in 10**np.arange(10, -1, -1):
                    while not np.sum((current - x) > 0):
                        scale += delta
                        current = gaussian(wavenumbers, mu, sigma, scale)
                    else:
                        scale -= delta
                        current = gaussian(wavenumbers, mu, sigma, scale)

                intergral = np.sum(current)
                if intergral > temp_intergral:
                    temp_intergral = intergral
                    temp_mu = mu, sigma, scale

            lst.append(gaussian(wavenumbers, *temp_mu))
        lst = np.array(lst)
        return np.max(lst, axis=0)
    return fit_gaussian
    
def preliminary_photo_approximation(x):
    fit_gaussian = init_fit_gaussian()
    fit_gaussian2 = init_fit_gaussian(left_steps=0)
    
    x_ = copy.copy(x)
    gaussians = []   
    for fit in [fit_gaussian, fit_gaussian2]:
        current = fit(x_)
        x_ -= current
        gaussians.append(current)
    gaussians = np.array(gaussians)
    return np.sum(gaussians, axis=0)


In [11]:
def zero_bound_to_None(bound):
    # -0 must be translate to None
    return -bound if bound != 0 else None

def init_smooth_grad(general_noise=10e6, continue_gap=100):
    def smooth_grad(poly2):
        general_grad_func = interpolate.UnivariateSpline(wavenumbers, 
                                                         poly2,
                                                         k=3, s=general_noise)
        general_grad = general_grad_func(wavenumbers)
        grad_general = general_grad[1:] - general_grad[:-1]

        grad = poly2[1:] - poly2[:-1]        
        grad2 = np.pad(poly2[2:] - 2 * poly2[1:-1] + poly2[:-2],(1, 0), 'edge')

        general_max_lst = signal.argrelmax(grad_general)[0]
        max_lst = signal.argrelmax(grad)[0]
        max_grad_lst = signal.argrelmax(grad2)[0]
        min_grad_lst = signal.argrelmin(grad2)[0]

        for max_ in max_lst:
            general_max = general_max_lst[np.argmin([abs(x - max_) for x in general_max_lst])]
            if abs(general_max - max_) < continue_gap/2:
                continue

            try:
                *_, left = filter(lambda x: x < max_, max_grad_lst)
                right = next(filter(lambda x: x > max_, min_grad_lst))
            except (ValueError, StopIteration) as error:
                continue

            try:
                *_, left_max = filter(lambda x: x < max_, max_lst)
            except ValueError:
                left_max = 0

            try:
                right_max = next(filter(lambda x: x > max_, max_lst))
            except StopIteration:
                right_max = grad.shape[0]          

            old_sum = sum(grad)
            restore_grad = copy.copy(grad)
            grad[left:max_] = grad[left]
            new_sum = sum(grad)
            i = max_

            dist = 3 * min(max_ - left, right - max_)
            dist = min(dist, max_ - left_max, right_max - max_)

            while True:
                while new_sum < old_sum:
                    grad[i:i+10] = grad[left]
                    new_sum = sum(grad)
                    i += 10
                    if i > min(max_ + dist, grad.shape[0]):
                        left = min(left+10, grad.shape[0]-1) #to prevent left going out of bounds
                        grad = copy.copy(restore_grad)
                        grad[left:max_] = grad[left]
                        new_sum = sum(grad)
                        i = max_
                        break
                else:
                    break

            flat_residu = (old_sum - new_sum) / (left-i)
            grad[left:i] -= flat_residu

            # if flatting of the maximum gradient is not possible skip
            if left >= max_:
                continue

            smooth_dist = max(2, min(i-left, continue_gap))
            sigma = smooth_dist/4
            smooth_dist2 = smooth_dist//2
            smooth_dist2b = smooth_dist - smooth_dist2
            smooth_dist3 = smooth_dist + smooth_dist2
            if left-smooth_dist3 < 0: # correct for maximums at the left part of the spectrum
                left_bound, gaus_left_bound = max(0, left-smooth_dist2), max(0, left-smooth_dist)
                grad[left_bound:i+smooth_dist2] = ndimage.gaussian_filter(grad[gaus_left_bound:i+smooth_dist], sigma, mode='nearest')[left_bound - gaus_left_bound:-smooth_dist2b]
                left_bound = max(0, left-smooth_dist)
                grad[left_bound: left+smooth_dist] = ndimage.gaussian_filter(grad[:left+smooth_dist3], sigma, mode='nearest')[left_bound:-smooth_dist2]
                left_bound, gaus_left_bound = max(0, i-smooth_dist), max(0, i-smooth_dist3)
                grad[left_bound:i+smooth_dist] = ndimage.gaussian_filter(grad[gaus_left_bound:i+smooth_dist3], sigma, mode='nearest')[left_bound - gaus_left_bound:-smooth_dist2]
            elif i + smooth_dist3 > grad.shape[0]: # correct for maximums at the right part of the spectrum
                right_bound, gaus_right_bound = max(grad.shape[0] - (i+smooth_dist2), 0), max(grad.shape[0] - (i+smooth_dist), 0)
                right_diff = right_bound - gaus_right_bound
                right_bound, gaus_right_bound, right_diff = zero_bound_to_None(right_bound), zero_bound_to_None(gaus_right_bound), zero_bound_to_None(right_diff)
                grad[left-smooth_dist2:right_bound] = ndimage.gaussian_filter(grad[left-smooth_dist:gaus_right_bound], sigma, mode='nearest')[smooth_dist2b:right_diff]
                right_bound, gaus_right_bound = max(grad.shape[0] - (left+smooth_dist), 0), max(grad.shape[0] - (left+smooth_dist3), 0)
                right_diff = right_bound - gaus_right_bound
                right_bound, gaus_right_bound, right_diff = zero_bound_to_None(right_bound), zero_bound_to_None(gaus_right_bound), zero_bound_to_None(right_diff)
                grad[left-smooth_dist:right_bound] = ndimage.gaussian_filter(grad[left-smooth_dist3:min(left+smooth_dist3, grad.shape[0])], sigma, mode='nearest')[smooth_dist2:right_diff]
                right_bound = max(grad.shape[0] - (i+smooth_dist), 0)
                right_bound = zero_bound_to_None(right_bound)
                grad[i-smooth_dist:right_bound] = ndimage.gaussian_filter(grad[i-smooth_dist3:], sigma, mode='nearest')[smooth_dist2:right_bound]
            else:
                grad[left-smooth_dist2:i+smooth_dist2] = ndimage.gaussian_filter(grad[left-smooth_dist:i+smooth_dist], sigma, mode='nearest')[smooth_dist2b:-smooth_dist2b]            
                grad[left-smooth_dist:left+smooth_dist] = ndimage.gaussian_filter(grad[left-smooth_dist3:left+smooth_dist3], sigma, mode='nearest')[smooth_dist2:-smooth_dist2]
                grad[i-smooth_dist:i+smooth_dist] = ndimage.gaussian_filter(grad[i-smooth_dist3:i+smooth_dist3], sigma, mode='nearest')[smooth_dist2:-smooth_dist2]

            new_sum = sum(grad)
            left_bound, right_bound = max(0,left-smooth_dist), min(i+smooth_dist, grad.shape[0])
            flat_residu = (old_sum - new_sum) / (left_bound - right_bound)
            grad[left_bound: right_bound] -= flat_residu

        value = poly2[0]
        poly2 = [value]
        for g in grad:
            value += g
            poly2.append(value)
            
        return np.array(poly2)
    return smooth_grad

smooth_grad = init_smooth_grad()


In [12]:
def init_poly_approximation(general_noise=50e6, continue_gap=100, intervals=10, precision=12, poly_fit=1e5):
    def poly_approximation(x, general):
        general_grad_func = interpolate.UnivariateSpline(wavenumbers, 
                                                         general,
                                                         k=4, s=general_noise)
        general_grad = general_grad_func(wavenumbers)
        sec_grad2 = np.pad(general_grad[2:] - 2*general_grad[1:-1] + general_grad[:-2], (1, 1), 'edge')
        grad = np.pad(general[1:] - general[:-1], (0,1), 'edge')
        grad_m = np.mean(np.abs(grad))
        small_grad = np.abs(grad) < grad_m
        sec_grad_pos = (sec_grad2 < 0)

        small_grad = ndimage.minimum_filter(small_grad, size=continue_gap)
        sec_grad_pos = ndimage.minimum_filter(sec_grad_pos, size=continue_gap)
        small_grad = ndimage.maximum_filter(small_grad, size=continue_gap)
        sec_grad_pos = ndimage.maximum_filter(sec_grad_pos, size=continue_gap)
        logic = sec_grad_pos * small_grad

        sec_grad_sign = ndimage.gaussian_filter(logic.astype(np.float32), 50)       

        minimums = []
        step = x.shape[0]//intervals
        for j in range(0, step, step//precision):
            x_ = copy.copy(x)
            # to adress exponential values at the left side of the spectrum, make these value higher
            if j > 0:
                new_step = x_[:j].shape[0]
                slope = general[:j]
                height = slope[-1] - slope[0]
                slope = np.linspace(0, height, new_step)
                index = np.argmin(x_[:j] - slope)
                slope -= slope[index]
                slope += x[index]
                x_[:j] = slope

            for i in range(j, x.shape[0]-1, step):
                step_size = step
                new_step = x_[i:i+step_size].shape[0]
                smooth_curve = general[i:i+new_step]
                height = smooth_curve[-1] - smooth_curve[0]
                slope = np.linspace(0, height, new_step)
                index = np.argmin(x_[i:i+new_step] - slope)
                slope -= slope[index]
                slope += x[i+index]
                x_[i:i+new_step] = slope
            minimums.append(x_)

        poly_max = np.max(np.array(minimums), axis=0)
        poly_min = np.min(np.array(minimums), axis=0)
        poly = sec_grad_sign * poly_min + (1-sec_grad_sign) * poly_max

        old_poly = poly-1
        j = 0
        while sum(poly-old_poly) and j < 5:
            j += 1
            func = interpolate.UnivariateSpline(wavenumbers, 
                                                poly,
                                                k=4, s=poly_fit)
            poly2 = func(wavenumbers)

            old_poly = copy.copy(poly)
            """
            This pushes the poly graph down acting like a weight for the spline to fit the graph below x
            """
            problem_part = ndimage.maximum_filter(ndimage.minimum_filter(poly2 > x, 3), continue_gap//5)
            poly -= problem_part * 5
        poly2 = smooth_grad(poly2)

        return poly2
    return poly_approximation
poly_approximation = init_poly_approximation()

In [19]:
def split_signal(img):
    """ 
    img consists of data and label
    
    artefact due to using previous points 
    
    """

    x = img[0].reshape(-1, img[0].shape[-1])
    
    raman = np.empty(x.shape)
    photo = np.empty(x.shape)
    for pixel in range(x.shape[0]):
#         if not pixel % 10:
        print(f"progress = {100* pixel / x.shape[0]}%")
        # poly approximation
        poly = preliminary_photo_approximation(x[pixel])
        poly2 = smooth_grad(poly)
        for i in range(3):
            poly2 = poly_approximation(x[pixel], poly2)

        # raman approximation
        raman2 = remove_noise_fft(x[pixel]-poly2)

        raman[pixel] = raman2
        photo[pixel] = poly2
        
    return raman, photo

In [20]:
import timeit
from os import path

file_location2 = f"{'/'.join(file_location.split('/')[:-2])}/approximated_curve_fit/"
os.makedirs(file_location2, exist_ok=True)

shape = data[0][0].shape

for j, (f, img) in enumerate(zip(filenames, data)):   
    start = timeit.default_timer()
    if path.exists(f'{file_location2}{f.split("/")[-1].split(".")[0]}_raman.npy'):
        print(f'file already exist {file_location2}{f.split("/")[-1].split(".")[0]}_raman')
        continue
    
    raman, photo = split_signal(img)

    stop = timeit.default_timer()
    print('Time: ', stop - start)  

    np.save(f'{file_location2}{f.split("/")[-1].split(".")[0]}_raman', raman.reshape(shape))
    np.save(f'{file_location2}{f.split("/")[-1].split(".")[0]}_photoluminescence', photo.reshape(shape))
    print(f"image: {f} is done.")
    break

progress = 0.0%
progress = 0.02666666666666667%
progress = 0.05333333333333334%
progress = 0.08%
progress = 0.10666666666666667%
progress = 0.13333333333333333%
progress = 0.16%
progress = 0.18666666666666668%
progress = 0.21333333333333335%
progress = 0.24%
progress = 0.26666666666666666%
progress = 0.29333333333333333%
progress = 0.32%
progress = 0.3466666666666667%
progress = 0.37333333333333335%
progress = 0.4%
progress = 0.4266666666666667%
progress = 0.4533333333333333%
progress = 0.48%
progress = 0.5066666666666667%
progress = 0.5333333333333333%
progress = 0.56%
progress = 0.5866666666666667%
progress = 0.6133333333333333%
progress = 0.64%
progress = 0.6666666666666666%


KeyboardInterrupt: 