In [None]:

import os, zipfile, random
import numpy as np
import scipy.io as sio
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import joblib  

seed = 42
random.seed(seed)
np.random.seed(seed)

In [None]:
data_mat = sio.loadmat("Indian_pines_corrected.mat")
gt_mat   = sio.loadmat("Indian_pines_gt.mat")

data = data_mat['indian_pines_corrected']   
gt   = gt_mat['indian_pines_gt']            

print("data:", data.shape, "gt:", gt.shape)

data: (145, 145, 200) gt: (145, 145)


In [None]:
n_components = 30

H,W,B = data.shape
data_reshaped = data.reshape(-1, B).astype(np.float32)
band_mean = data_reshaped.mean(axis=0)
band_std  = data_reshaped.std(axis=0) + 1e-12
data_norm = (data_reshaped - band_mean) / band_std

pca = PCA(n_components=n_components, whiten=True, random_state=seed)
data_pca_flat = pca.fit_transform(data_norm)    
data_pca = data_pca_flat.reshape(H, W, n_components)

joblib.dump({'pca': pca, 'mean': band_mean, 'std': band_std}, 'pca_and_norm.joblib')

print("After PCA:", data_pca.shape)

After PCA: (145, 145, 30)


In [None]:
patch_size = 25
pad = patch_size // 2

data_pad = np.pad(data_pca, ((pad,pad),(pad,pad),(0,0)), mode='constant')
gt_pad   = np.pad(gt, ((pad,pad),(pad,pad)), mode='constant')

patches = []
labels  = []
H0, W0 = gt.shape
for i in range(pad, pad + H0):
    for j in range(pad, pad + W0):
        lab = gt_pad[i, j]
        if lab == 0:
            continue  
        patch = data_pad[i-pad:i+pad+1, j-pad:j+pad+1, :]  
        patches.append(patch.astype(np.float32))
        labels.append(int(lab))  

patches = np.array(patches)  
labels  = np.array(labels)    
print("Extracted patches:", patches.shape, "labels:", np.unique(labels))

Extracted patches: (10249, 25, 25, 30) labels: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]


In [None]:

unique_labels = np.unique(labels)
train_idx_list = []
test_idx_list  = []

for c in unique_labels:
    idx_c = np.where(labels == c)[0]

    idx_c_sorted = np.array(idx_c)

    n_train = max(1, int(0.10 * len(idx_c_sorted))) 
    tr_idx, te_idx = train_test_split(idx_c_sorted, train_size=n_train, random_state=seed, shuffle=True)
    train_idx_list.append(tr_idx)
    test_idx_list.append(te_idx)

train_idx = np.concatenate(train_idx_list)
test_idx  = np.concatenate(test_idx_list)

X_train = patches[train_idx]
y_train = labels[train_idx] - 1   
X_test  = patches[test_idx]
y_test  = labels[test_idx] - 1


perm = np.random.RandomState(seed).permutation(len(X_train))
X_train = X_train[perm]; y_train = y_train[perm]

print("Train:", X_train.shape, y_train.shape)
print("Test:", X_test.shape, y_test.shape)

Train: (1018, 25, 25, 30) (1018,)
Test: (9231, 25, 25, 30) (9231,)


In [None]:
class HSIDataset(Dataset):
    def __init__(self, X, y, augment=False):
        self.X = X
        self.y = y
        self.augment = augment

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        x = self.X[idx].transpose(2,0,1)  
        if self.augment:
            if random.random() > 0.5:
                x = np.flip(x, axis=1)
            if random.random() > 0.5:
                x = np.flip(x, axis=2)
        x = torch.from_numpy(x).unsqueeze(0).float()  
        y = torch.tensor(self.y[idx]).long()
        return x, y

train_dataset = HSIDataset(X_train, y_train, augment=True)
test_dataset  = HSIDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:

os.makedirs("preprocessedd", exist_ok=True)
np.save("preprocessedd/X_train.npy", X_train)
np.save("preprocessedd/y_train.npy", y_train)
np.save("preprocessedd/X_test.npy", X_test)
np.save("preprocessedd/y_test.npy", y_test)

!zip -r hsi_data_pca.zip preprocessedd
print("Saved and zipped preprocessedd/hsi_data_pca.zip")

  adding: preprocessedd/ (stored 0%)
  adding: preprocessedd/y_train.npy (deflated 88%)
  adding: preprocessedd/y_test.npy (deflated 100%)
  adding: preprocessedd/X_test.npy (deflated 12%)
  adding: preprocessedd/X_train.npy (deflated 12%)
Saved and zipped preprocessedd/hsi_data_pca.zip


In [28]:
!zip -r hsi_data.zip preprocessedd

  adding: preprocessedd/ (stored 0%)
  adding: preprocessedd/y_train.npy (deflated 88%)
  adding: preprocessedd/y_test.npy (deflated 100%)
  adding: preprocessedd/X_test.npy (deflated 12%)
  adding: preprocessedd/X_train.npy (deflated 12%)
