In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
!pip install numpy matplotlib spectrochempy

In [None]:
%matplotlib inline

### Import the spectrochempy API package



In [None]:
import spectrochempy as scp
import pandas as pd
import numpy as np

In [None]:
DATA_DIR = './data/'
OUTPUT_DIR = './output/'

### Load the dataset



In [None]:
datasets = pd.read_csv(f"{DATA_DIR}corrected_spectra_denoised_minmax.csv")
# datasets[datasets<0] = 0
datasets = datasets[datasets.Wavenumber>=500]
datasets = datasets[datasets.Wavenumber<=1800]
datasets.head()

In [None]:
datasets.shape

In [None]:
datasets.columns

In [None]:
D = datasets[datasets.columns[1:]]   # remove the first column
D.head()

In [None]:
# Convert D to a NDDataset
# NDDataset has YxX shape, so need to transpose D
X = scp.NDDataset(D.values.transpose(), name="D")
X

In [None]:
X.shape

## Use of PCA to assess the number of pure species¶
Let’s first analyse our dataset using PCA and plot a screeplot:

In [None]:
pca = scp.PCA(n_components=4) # number of components values = min(rows, cols)
pca.fit(X)
# pca.printev()
_ = pca.screeplot()

scores = pca.transform()
scores

In [None]:
X.plot()

In [None]:
# Initial guess of concentration
# guess = scores
guess = np.random.default_rng().uniform(0, 1, scores.shape)
print(X.shape, guess.shape)
# guess.plot()

Create a MCR-ALS object with the default settings
The verbose option can be set True to get a summary of optimization steps



In [None]:
mcr = scp.MCRALS(nonnegConc="all", nonnegSpec="all", solverConc="nnls", log_level="INFO")
mcr.fit(X=X, Y=guess)

# # Note constraint order matters
# mcrar = McrAR(max_iter=100, st_regr='NNLS', c_regr='OLS',
#               c_constraints=[ConstraintNonneg(), ConstraintNorm()])

# # Equivalent to

# # Note constraint order matters
# mcrar = McrAR(max_iter=100, st_regr=NNLS(), c_regr=OLS(),
#               c_constraints=[ConstraintNonneg(), ConstraintNorm()])

# # Otherwise, if you have an initial estimate of the concentrations
# mcrar.fit(X.values, C=guess)

In [None]:
print(mcr.C.shape)
_ = mcr.C.T.plot()

In [None]:
print(mcr.St.shape)
_ = mcr.St.plot()

In [None]:
mcr.St.plot()

Finally, plots the reconstructed dataset  (X_hat = C St) vs original dataset (X)
and residuals. The fit is good and comparable with the original paper.



In [None]:
corrected_spectra_denoised_minmax_new_st = mcr.St.data.transpose()
with open(f"{OUTPUT_DIR}corrected_spectra_denoised_minmax_new_st.csv", "w") as f:
    f.write("Wavenumber,COMP1,COMP2,COMP3,COMP4\n")
    for i in range(len(corrected_spectra_denoised_minmax_new_st)):
        f.write(f'{datasets.iloc[i].Wavenumber},{corrected_spectra_denoised_minmax_new_st[i][0]},{corrected_spectra_denoised_minmax_new_st[i][1]},{corrected_spectra_denoised_minmax_new_st[i][2]},{corrected_spectra_denoised_minmax_new_st[i][3]}\n')

In [None]:
sdf = pd.read_csv(f"{OUTPUT_DIR}corrected_spectra_denoised_minmax_new_st.csv")
sdf.set_index("Wavenumber").plot(subplots=True, figsize=(15,20));

In [None]:
columns = ['Components' + str(i+1) for i in range(mcr.C.shape[1])]
pd.DataFrame(mcr.C.values, columns=columns).to_csv(f"{OUTPUT_DIR}corrected_spectra_denoised_minmax_new_conc.csv", index=False)

normC = np.round(mcr.C/mcr.C.max(),3)
pd.DataFrame(normC.values, columns=columns).to_csv(f"{OUTPUT_DIR}corrected_spectra_denoised_minmax_new_conc_normalized.csv", index=False)
normC.T.plot(xlabel="Samples")