In [1]:
%pip install scipy

Note: you may need to restart the kernel to use updated packages.


In [20]:
from scipy.io import loadmat
salinas_data = loadmat('SalinasA.mat')
print(salinas_data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'salinasA'])


In [21]:
data_variable = salinas_data['salinasA']

In [22]:
from sklearn.decomposition import PCA
import numpy as np
M, N, D = data_variable.shape
HSI_data_reshaped = data_variable.reshape((M * N, D))
pca = PCA(n_components=3) 
principal_components = pca.fit_transform(HSI_data_reshaped)
print(principal_components[:10]) 

[[18841.82446926 -1147.70708616   112.18567526]
 [19027.82954708 -1291.64795427    64.91330149]
 [18833.91716613 -1280.21119012   125.9359714 ]
 [18724.82301408 -1296.96525322   157.42241387]
 [18706.97608258 -1217.19301651   316.26224993]
 [18564.18000585  -991.48465237   499.2983201 ]
 [18179.01223604  -683.40216669   594.40150618]
 [17756.09120054  -456.62519289   660.38539209]
 [17416.30362116  -298.68681024   716.03799754]
 [17241.84264386  -250.58258745   733.67746855]]


In [23]:
from sklearn.decomposition import NMF
if np.any(HSI_data_reshaped < 0):
    HSI_data_reshaped = np.clip(HSI_data_reshaped, a_min=0, a_max=None)
nmf_model = NMF(n_components=3, init='random', random_state=42)
W = nmf_model.fit_transform(HSI_data_reshaped) 
H = nmf_model.components_  

best_approximation = np.dot(W, H)
print("Best rank 3 NMF approximation:", best_approximation)



Best rank 3 NMF approximation: [[392.83990262 493.61394406 689.63615984 ...   2.05574696   2.5174579
    2.01488681]
 [395.69581836 496.96603735 694.08118675 ...   2.06420331   2.5278135
    2.02317507]
 [393.95491195 494.92145313 691.38314776 ...   2.08084334   2.55700362
    2.03664493]
 ...
 [405.01895534 527.01794412 768.32521849 ...  23.38289584  37.01879839
   20.21679052]
 [403.60162492 525.36072907 766.16762392 ...  23.42873407  37.09506203
   20.25523177]
 [400.65537067 521.59987964 760.73926084 ...  23.23876188  36.79260096
   20.09153174]]




In [26]:
reconstruction_error = np.linalg.norm(HSI_data_reshaped - best_approximation, 'fro')
print("Reconstruction error for best rank 3 NMF approximation:", reconstruction_error)


Reconstruction error for best rank 3 NMF approximation: 57631.552483475934


In [27]:
print("First few components from NMF (H matrix):")
print(H[:3])  

First few components from NMF (H matrix):
[[5.56602772e+00 7.26190504e+00 1.04158037e+01 1.71963055e+01
  2.15570417e+01 2.24421866e+01 2.10377516e+01 2.27226786e+01
  2.51612720e+01 2.52142738e+01 2.43197226e+01 2.37412709e+01
  2.18959863e+01 2.16300504e+01 2.12120515e+01 2.05753639e+01
  2.22536644e+01 2.34103022e+01 2.35828272e+01 2.26745542e+01
  2.06154142e+01 1.85630128e+01 1.70407845e+01 1.58477802e+01
  1.49788564e+01 1.37123752e+01 1.26184593e+01 1.19704934e+01
  1.12591057e+01 1.04930497e+01 1.05164469e+01 1.02975649e+01
  1.05014891e+01 1.01927155e+01 9.18958998e+00 8.72245003e+00
  1.38935472e+01 2.58325367e+01 4.25356452e+01 7.45983681e+01
  1.09018443e+02 1.28739067e+02 8.07912257e+01 1.24845881e+02
  1.35229129e+02 1.30173006e+02 1.27078176e+02 1.21107353e+02
  1.02167335e+02 1.05523109e+02 1.12549827e+02 1.12369416e+02
  1.08554279e+02 1.06174232e+02 1.05604917e+02 1.02466707e+02
  8.57433564e+01 7.70247317e+01 7.08636274e+01 7.10578024e+01
  3.21344510e+01 3.22108413e

(c) PCA finds directions that best capture the differences in the data, allowing both positive and negative values. NMF breaks down data into non-negative parts, making it easier to interpret. PCA sorts its findings by importance, but NMF does not. Use PCA for a broad overview and NMF for a detailed, positive-only breakdown.

(d) NMF is good for HSI data in image processing because it splits the data into clear, positive parts, making it easier to understand what's in the images. Even though PCA might do a better job in approximating data overall.