In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import ipywidgets as pyw
import hyperspy.api as hys
import tkinter.filedialog as tkf
from tabulate import tabulate
from sklearn.decomposition import NMF
import malspy
plt.rcParams['font.family'] = 'Times New Roman'

In [None]:
# create a customized colorbar
color_rep = ["black", "red", "green", "blue", "orange", "purple", "yellow", "lime", 
             "cyan", "magenta", "lightgray", "peru", "springgreen", "deepskyblue", 
             "hotpink", "darkgray"]
print(len(color_rep))
custom_cmap = mcolors.ListedColormap(color_rep)
bounds = np.arange(-1, len(color_rep))
norm = mcolors.BoundaryNorm(boundaries=bounds, ncolors=len(color_rep))
sm = cm.ScalarMappable(cmap=custom_cmap, norm=norm)
sm.set_array([])

cm_rep = ["gray", "Reds", "Greens", "Blues", "Oranges", "Purples"]
print(len(cm_rep))

In [None]:
def data_load(adr, zlp_adr, zlp_normalize=None, crop=None):
    """
    load a spectrum image
    """
    storage = []
    shape = []
    for i, adr in enumerate(adr):
        temp = hys.load(adr)
        #print(temp.axes_manager[2])
        
        zlp = hys.load(zlp_adr[i])
        if zlp_normalize=="sum":
            zlp_tmp = np.sum(zlp.data, axis=2)
        elif zlp_normalize=="max":
            zlp_tmp = np.max(zlp.data, axis=2)
        else:
            "You chose a wrong option"
            return
        #print(np.max(zlp_tmp), np.min(zlp_tmp))
        #print(zlp_tmp.shape)
        
        temp.data = temp.data / zlp_tmp[:, :, np.newaxis]
        #print(np.max(temp.data), np.min(temp.data))
        temp = temp.isig[crop[0]:crop[1]]
        temp = temp.data
        print(temp.shape)

        shape.append(temp.shape)
        storage.append(temp)
    
    shape = np.asarray(shape)
    return storage, shape

In [None]:
def reshape_coeff(coeffs, new_shape):
    """
    reshape a coefficient matrix to restore the original scanning shapes.
    """
    coeff_reshape = []
    for i in range(len(new_shape)):
        temp = coeffs[:int(new_shape[i, 0]*new_shape[i, 1]), :]
        coeffs = np.delete(coeffs, range(int(new_shape[i, 0]*new_shape[i, 1])), axis=0)
        temp = np.reshape(temp, (new_shape[i, 0], new_shape[i, 1], -1))
        #print(temp.shape)
        coeff_reshape.append(temp)
        
    return coeff_reshape

In [None]:
file_adr = []

In [None]:
file_adr.extend(tkf.askopenfilenames())
print(len(file_adr))
print(*file_adr, sep="\n")

In [None]:
zlp_adr = []

In [None]:
zlp_adr.extend(tkf.askopenfilenames())
print(len(zlp_adr))
print(*zlp_adr, sep="\n")

In [None]:
num_img = len(file_adr)
print(num_img)

In [None]:
# load spectrum images
cr_range = [0.5, 3.0, 0.01] # actual input
data_storage, data_shape = data_load(file_adr, zlp_adr, zlp_normalize="sum", crop=cr_range)
print(len(data_storage))
print(data_shape)

e_range = np.arange(cr_range[0], cr_range[1], cr_range[2])
depth = len(e_range)
print(len(e_range))

In [None]:
# create the input dataset
dataset_flat = []
for i in range(num_img):
    dataset_flat.extend(data_storage[i].clip(min=0.0).reshape(-1, depth).tolist())
    
dataset_flat = np.asarray(dataset_flat)
print(dataset_flat.shape)

In [None]:
# optional
tmp = np.max(dataset_flat, axis=1)
tmp[np.where(tmp==0)] = 1.0
dataset_flat = dataset_flat / tmp.reshape(-1, 1)
print(dataset_flat.shape)

In [None]:
total_num = len(dataset_flat)
ri = np.random.choice(total_num, total_num, replace=False)

dataset_input = dataset_flat[ri]

In [None]:
num_comp = 5

In [None]:
# https://github.com/MotokiShiga/malspy
# NMF with automatic relevance determination and soft orthogonality penalty
model_nmf_ardso = malspy.NMF_ARD_SO(n_components=num_comp, wo=0.1, reps=5, max_itr=100)
model_nmf_ardso.fit(dataset_input)
model_nmf_ardso.plot_spectra(figsize=(6, 3), normalize=False)
ardso_coeffs = model_nmf_ardso.C_
print(ardso_coeffs.shape)
ardso_comp_vectors = model_nmf_ardso.S_
ardso_comp_vectors = np.rollaxis(ardso_comp_vectors, 1, 0)
print(ardso_comp_vectors.shape)

In [None]:
# convert the coefficient matrix into coefficient maps
coeffs = np.zeros_like(ardso_coeffs)
coeffs[ri] = ardso_coeffs.copy()
coeffs_reshape = reshape_coeff(coeffs, data_shape)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5)) # all loading vectors

for i in range(len(ardso_comp_vectors)):
    ax.plot(e_range, ardso_comp_vectors[i], "-", c=color_rep[i+1], label="loading vector %d"%(i+1))
ax.grid()
ax.legend(fontsize="large")
ax.set_xlabel("eV", fontsize=10)
ax.tick_params(axis="x", labelsize=10)
ax.axes.get_yaxis().set_visible(False)

fig.tight_layout()
plt.show()

In [None]:
# visualize the coefficient maps
if num_img != 1:
    for i in range(len(ardso_comp_vectors)):
        fig, ax = plt.subplots(1, num_img, figsize=(5*num_img, 5))
        for j in range(num_img):
            tmp = ax[j].imshow(coeffs_reshape[j][:, :, i], cmap="viridis")
            ax[j].set_title("loading vector %d map"%(i+1), fontsize=10)
            ax[j].axis("off")
            fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()
else:            
    for i in range(len(ardso_comp_vectors)):
        fig, ax = plt.subplots(1, 1, figsize=(5*num_img, 5))
        tmp = ax.imshow(coeffs_reshape[0][:, :, i], cmap="viridis")
        ax.set_title("loading vector %d map"%(i+1), fontsize=10)
        ax.axis("off")
        fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()

In [None]:
import tifffile
for i in range(num_comp):
    tilt_series = []
    for j in range(num_img):
        tilt_series.append(coeffs_reshape[j][:, :, i].astype(np.float32))
    tilt_series = np.asarray(tilt_series)
    tifffile.imsave("tilt_series_latent_%02d.tif"%(i+1), tilt_series)

In [None]:
# https://github.com/MotokiShiga/malspy
# NMF with soft orthogonality penalty
model_nmf_so = malspy.NMF_SO(n_components=num_comp, wo=0.10, reps=5, max_itr=100)
model_nmf_so.fit(dataset_input)
so_coeffs = model_nmf_so.C_
print(so_coeffs.shape)
so_comp_vectors = model_nmf_so.S_
so_comp_vectors = np.rollaxis(so_comp_vectors, 1, 0)
print(so_comp_vectors.shape)

In [None]:
# convert the coefficient matrix into coefficient maps
coeffs = np.zeros_like(so_coeffs)
coeffs[ri] = so_coeffs.copy()
coeffs_reshape = reshape_coeff(coeffs, data_shape)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5)) # all loading vectors
for i in range(num_comp):
    ax.plot(e_range, so_comp_vectors[i], "-", c=color_rep[i+1], label="loading vector %d"%(i+1))
ax.grid()
ax.legend(fontsize="large")
ax.set_xlabel("eV", fontsize=10)
ax.tick_params(axis="x", labelsize=10)
ax.axes.get_yaxis().set_visible(False)

fig.tight_layout()
plt.show()

In [None]:
# visualize the coefficient maps
if num_img != 1:
    for i in range(num_comp):
        fig, ax = plt.subplots(1, num_img, figsize=(5*num_img, 5))
        for j in range(num_img):
            tmp = ax[j].imshow(coeffs_reshape[j][:, :, i], cmap="viridis")
            ax[j].set_title("loading vector %d map"%(i+1), fontsize=10)
            ax[j].axis("off")
            fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()
else:            
    for i in range(num_comp):
        fig, ax = plt.subplots(1, 1, figsize=(5*num_img, 5))
        tmp = ax.imshow(coeffs_reshape[0][:, :, i], cmap="viridis")
        ax.set_title("loading vector %d map"%(i+1), fontsize=10)
        ax.axis("off")
        fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()

In [None]:
import tifffile
for i in range(num_comp):
    tilt_series = []
    for j in range(num_img):
        tilt_series.append(coeffs_reshape[j][:, :, i].astype(np.float32))
    tilt_series = np.asarray(tilt_series)
    tifffile.imsave("tilt_series_latent_%02d.tif"%(i+1), tilt_series)

In [None]:
# NMF decomposition (linear dimensionality reduction)
# please visit the below link for detailed information on NMF
# https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html?highlight=nmf#sklearn.decomposition.NMF

skl_nmf = NMF(n_components=num_comp, init="nndsvda", solver="mu", max_iter=2000, 
              verbose=True, beta_loss="frobenius", l1_ratio=0.0, alpha=0.0)

skl_coeffs = skl_nmf.fit_transform(dataset_input)
skl_comp_vectors = skl_nmf.components_
print(skl_coeffs.shape)
print(skl_comp_vectors.shape)

In [None]:
# convert the coefficient matrix into coefficient maps
coeffs = np.zeros_like(skl_coeffs)
coeffs[ri] = skl_coeffs.copy()
coeffs_reshape = reshape_coeff(coeffs, data_shape)

In [None]:
# visualize loading vectors

fig, ax = plt.subplots(1, 2, figsize=(10, 4)) # all loading vectors
for i in range(num_comp):
    ax[0].plot(e_range, skl_comp_vectors[i], "-", c=color_rep[i+1], label="loading vector %d"%(i+1))
ax[0].grid()
ax[0].legend(fontsize="large")
ax[0].set_xlabel("eV", fontsize=10)
ax[0].tick_params(axis="x", labelsize=10)
ax[0].axes.get_yaxis().set_visible(False)

sel_nmf_comp = [2, 3, 4, 5] # choose several loading vectors to visualize
for i in sel_nmf_comp:
    ax[1].plot(e_range, skl_comp_vectors[i-1], "-", c=color_rep[i], label="loading vector %d"%(i))
ax[1].grid()
ax[1].legend(fontsize="large")
ax[1].set_xlabel("eV", fontsize=10)
ax[1].tick_params(axis="x", labelsize=10)
ax[1].axes.get_yaxis().set_visible(False)

fig.tight_layout()
plt.show()

In [None]:
# visualize the coefficient maps
if num_img != 1:
    for i in range(num_comp):
        fig, ax = plt.subplots(1, num_img, figsize=(5*num_img, 5))
        for j in range(num_img):
            tmp = ax[j].imshow(coeffs_reshape[j][:, :, i], cmap="viridis")
            ax[j].set_title("loading vector %d map"%(i+1), fontsize=10)
            ax[j].axis("off")
            fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()
else:            
    for i in range(num_comp):
        fig, ax = plt.subplots(1, 1, figsize=(5*num_img, 5))
        tmp = ax.imshow(coeffs_reshape[0][:, :, i], cmap="viridis")
        ax.set_title("loading vector %d map"%(i+1), fontsize=10)
        ax.axis("off")
        fig.colorbar(tmp, cax=fig.add_axes([0.92, 0.15, 0.04, 0.7]))
        plt.show()

In [None]:
import tifffile
for i in range(num_comp):
    tilt_series = []
    for j in range(num_img):
        tilt_series.append(coeffs_reshape[j][:, :, i].astype(np.float32))
    tilt_series = np.asarray(tilt_series)
    tifffile.imsave("tilt_series_latent_%02d.tif"%(i+1), tilt_series)