In [3]:
import numpy as np
import copy
import pickle
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
from collections import defaultdict
import os 
from scipy.optimize import curve_fit
import scipy.signal as signal
from signal_processing import CosmicrayCorrection

plt.rcParams['figure.figsize'] = (20.0, 10.0)
plt.rcParams['figure.dpi'] = 500

from IPython.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

In [10]:
file_location = "../data/Raman_Mouse/CRN_saturation_removed/"
# file_location = "../data/Green_excitation/corrected_4_wavenumbers/"
try:
    filenames = np.load(f"{file_location}FileNames.npy")
except FileNotFoundError:
    filenames = ['ML-MoS2-532-LP10-2-3-50X.npy']
    
data = []
for f in filenames:
    data.append(np.load(f"{file_location}{f}"))
data = np.array(data)
print(data.shape)

(51, 150, 25, 1253)


#### finding maxima https://en.wikipedia.org/wiki/Topographic_prominence

The Raman mouse dataset has at least 96 pixels have an anomaly.

In [11]:
file_location2 = f"{'/'.join(file_location.split('/')[:-2])}/cosmic_ray_removed/"
os.makedirs(file_location2, exist_ok=True)

wavenumbers = np.load(f"{file_location}Wavenumbers.npy")

np.save(f'{file_location2}Wavenumbers.npy', wavenumbers) 

np.save(f'{file_location2}FileNames', filenames)  



In [12]:
# def find_cosmic_ray_noise(img):
#     """
#     find cosmic ray noise based on the wavenumbers within one pixel (a large difference of intensities)
#     TODO: 
#     """    
#     smooth = gaussian_filter(img, sigma=(0,0,1), order=0, mode='nearest')
#     diff = img-smooth
#     grad = gaussian_filter(diff, sigma=(0,0,0.5), order=1, mode='nearest')

#     # find the elbow in the gradient of the data
#     data = np.sort(np.abs(grad.flatten()))
#     sec = gaussian_filter(data[1:] - data[:-1], sigma=3)
#     m_sec, s_std = np.mean(sec), np.std(sec)
#     threshold = data[np.max(np.where(sec < m_sec + 0.5*s_std))]   
    
#     # make a dict where per pixel the problem area's are in.
#     tmp = defaultdict(list)
#     for x,y,z in zip(*np.where(grad > threshold)):
#         tmp[(x,y)].append(z)
        
#     return tmp

# def find_cosmic_ray_noise_neighbourhood(img):
#     """
#     find cosmic ray noise based also on the neighbourhood (a large difference of intensities)
#     This returns false positives if in the neighbourhood there is a large spike but not in the pixel it self.
#     """
#     smooth = gaussian_filter(img, sigma=(1,1,1), order=0, mode='nearest')
#     diff = img-smooth
#     grad = gaussian_filter(diff, sigma=(0,0,0.5), order=1, mode='nearest')
    
#     # find the elbow in the data
#     data = np.sort(np.abs(grad.flatten()))
#     sec = gaussian_filter(data[1:] - data[:-1], sigma=3)
#     m_sec, s_std = np.mean(sec), np.std(sec)
#     threshold = data[np.max(np.where(sec < m_sec + 0.5*s_std))]    
    
#     # make a dict where per pixel the problem area's are in.
#     tmp = defaultdict(list)
#     for x,y,z in zip(*np.where(grad > threshold)):
#         tmp[(x,y)].append(z)
        
#     return tmp

# def find_region(lst):
#     lst = sorted(lst)
#     start = []
#     stop = []
#     index = lst[0]
#     start.append(index)
#     for i in lst[1:]:
#         if i-index < 5:
#             index = i
#         else:
#             stop.append(index)
#             index = i
#             start.append(index)
#     stop.append(index)
#     return list(zip(start, stop))

# def gaussian(base):
#     def tmp(x, *params):
#         mu = np.array(params[slice(0,len(params),3)])
#         scale = np.array(params[slice(1,len(params),3)])
#         sigma = np.array(params[slice(2,len(params),3)])
#         return np.sum(scale * np.exp(-0.5*((x.reshape(-1,1) - mu)/sigma)**2),1) + base
#     return tmp


# def correcting_4_cosmic_ray_noies(img, cosmic_ray_noise, func = gaussian):
#     img2 = copy.copy(img)
    
#     # find cosmic ray noise indices for each pixel
#     new_cosmic_ray_noise = defaultdict(list)
#     for (x,y), lst in cosmic_ray_noise.items():
#         # collect all the indices and turn them into seperate windows/ranges with appropiate spacing such that 
#         # interpolation can be used to estimate the "true" value of the effected pixel wavenumbers combination
#         for Range in find_region(lst):
#             # determine the range of the region
#             size = Range[1]-Range[0]
#             padding = int(4 + (size)/2)
#             X = np.arange(max(0,Range[0]-padding), min(img2.shape[2], Range[1]+padding+1), dtype=int)

#             # fit a guassian curve to check for incorrectly classified cosmic ray noise
#             mu, base = X[np.argmax(img2[x,y,X[0]:X[-1]+1])], np.min(img2[x,y,X[0]:X[-1]+1])
#             sigma, scale = size/2 if size != 0 else 1, max(1, img2[x,y][mu])
#             base_adjusted_func = func(base)
#             try:
#                 popt, pcov = curve_fit(base_adjusted_func, X, img2[x,y,X[0]:X[-1]+1], p0=[mu, scale, sigma])
#             except RuntimeError:
#                 # definitly not a gaussian
#                 img2[x,y,X[0]:X[-1]+1] = np.interp(X, [X[0], X[-1]], [img[x,y,X[0]], img[x,y,X[-1]]])
#                 new_cosmic_ray_noise[(x,y)].append((Range[1] + Range[0]) / 2)
#                 continue
                
#             fit = base_adjusted_func(X, *popt)
#             mu_fit, scale_fit, sigma_fit = popt
#             HM = scale_fit / 2
            
#             HW = sigma_fit * np.sqrt(-2*np.log(HM/scale_fit))
#             left, right, appr_left = mu_fit - HW, mu_fit + HW, max(0,min(len(wavenumbers)-1, int(mu_fit - HW)))
#             NRMSE = np.sqrt(np.mean((fit - img2[x,y,X[0]:X[-1]+1])**2))/scale_fit
#             FWHM = wavenumbers[min(len(wavenumbers)-1,appr_left + int(HW*2))] - wavenumbers[appr_left]
            
#             # if the NRMSE is below 0.1 and the full width (FW) is larger than 5, the found spike is Raman.
#             if NRMSE < 0.1 and FWHM > 5:
#                 if PLOT_ERRORS:
#                     print("------------ WRONG ---------------")
#                     print('MSE:', NRMSE, popt, [mu, scale, sigma], ',base:', base, ',HM:', HM, ',FWHM:', FWHM)
#                     plt.axhline(y=HM + base, color='g')
#                     plt.axvline(x=left, color='g')
#                     plt.axvline(x=mu_fit, color='y')
#                     plt.axvline(x=right, color='g')


#                     plt.plot(X, fit , 'r-', label='raman appr')
#                     plt.plot(X, img2[x,y,X[0]:X[-1]+1], label='raw')
#                     plt.legend()
#                     plt.show()

#                     for z in Range:
#                         plt.plot([z,z],[-1000,3000], alpha=0.1, color='k')
#                     plt.plot(img[x,y], alpha=0.4)
#                     plt.grid(True, which='both')
#                     plt.xlim([0,len(wavenumbers)])
#                     locs, _ = plt.xticks()
#                     plt.xticks(locs, [wavenumbers[int(i)] if i < len(wavenumbers) else "" for i in locs])
#                     plt.xlim([0,len(wavenumbers)])
#                     plt.show()
#                     print("------------ END WRONG ---------------")
#                 continue
                
#             print("REMOVED", x,y, Range, NRMSE, FWHM)
#             if PLOT_CORRECT:
#                 print("------------ CORRECT ---------------")
#                 print('MSE:', NRMSE, popt, [mu, scale, sigma], ',base:', base, ',HM:', HM, ',FWHM:', FWHM)
#                 plt.axhline(y=HM + base, color='g')
#                 plt.axvline(x=left, color='g')
#                 plt.axvline(x=mu_fit, color='y')
#                 plt.axvline(x=right, color='g')


#                 plt.plot(X, fit , 'r-', label='raman appr')
#                 plt.plot(X, img2[x,y,X[0]:X[-1]+1], label='raw')
#                 plt.legend()
#                 plt.show()

#                 for z in Range:
#                     plt.plot([z,z],[-1000,3000], alpha=0.3, color='k')
#                 plt.plot(img[x,y], alpha=0.8)
#                 plt.grid(True, which='both')
#                 plt.xlim([0,len(wavenumbers)])
#                 locs, _ = plt.xticks()
#                 plt.xticks(locs, [wavenumbers[int(i)] if i < len(wavenumbers) else "" for i in locs])
#                 plt.xlim([0,len(wavenumbers)])
#                 plt.show()
#                 print("------------ END CORRECT ---------------")
                
#             img2[x,y,X[0]:X[-1]+1] = np.interp(X, [X[0], X[-1]], 
#                                                     [img[x,y,X[0]],
#                                                      img[x,y,X[-1]]])  
#             new_cosmic_ray_noise[(x,y)].append((Range[1] + Range[0]) / 2)
#     return img2, new_cosmic_ray_noise

In [13]:
import timeit

cosmicray_removal = CosmicrayCorrection.remove_cosmicrays(wavenumbers)

for i, img in enumerate(data):
    print(filenames[i])
    start = timeit.default_timer()
    img, _ = cosmicray_removal(img)
        
    stop = timeit.default_timer()
    print('Time: ', stop - start)
            
    np.save(f'{file_location2}{filenames[i].split("/")[-1].split(".")[0]}', img) 


Liver_map_150z25_60s_#12.npy
Time:  0.48119654400011314
Muscle_map_150z25_60s_#34.npy
Time:  0.5395324569999502
Kidney_map_150z25_60s_#45.npy
Time:  0.5809473589999925
Kidney_map_150z25_60s_#43.npy
Time:  0.551478563000046
Liver_map_150x25_60s_#13.npy
Time:  0.4927398140000605
Muscle_map_150z25_60s_#27.npy
Time:  0.5047771499998817
Kidney_map_150z25_60s_#39.npy
Time:  0.5130912570000419
Liver_map_150z25_60s_#2.npy
Time:  0.47932473300011225
Muscle_map_150z25_60s_#19.npy
Time:  0.5091548460000013
Liver_map_150z25_60s_#01.npy
Time:  0.5328276410000399
Liver_map_150z25_60s_#03.npy
Time:  0.48842588200000137
Muscle_map_150z25_60s_#25.npy
Time:  0.5116319989999738
Kidney_map_150z25_60s_#36.npy
Time:  0.550126124000144
Liver_map_150x25_60s_#9.npy
Time:  0.4758452660000785
Liver_map_150z25_60s_#8.npy
Time:  0.5365506309999546
Muscle_map_150z25_60s_#20.npy
Time:  0.5155102950000128
Kidney_map_150z25_60s_#47.npy
Time:  0.5239892150000287
Liver_map_150z25_60s_#15.npy
Time:  0.6064909599999737
Mu