Challenge (5 point)

Improving classification spatial awareness by taking into account neighbouring pixels. This will involve:

- Creation and training of a 2D CNN classifier within a Jupyter notebook which takes into account the neighbouring pixels of HSI data.
- Test at least two 2D convolution filters sizes (e.g. 3x3, 5x5) and outline their corresponding performances.
- Tune the hyperparameters in order to achieve the optimal trade-off between model performance and training time.
- Present the output from you 2D CNN and compare with cross-correlation, DT, 1D CNN or MLP.
- Discuss potential future avenues for improving classification accuracy

In [103]:
from scipy.io import loadmat
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import csv
import time
import sys
import torch
import torchmetrics  # metrics like accuracy, recall, etc
import torch.nn as nn
# from torch import nn
from torch.nn.modules.container import Sequential
import torch.nn.functional as F


from sklearn import decomposition, datasets
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from copy import deepcopy
from sklearn.neural_network import MLPRegressor


DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
print("we can use:", DEVICE, "to run the Model ")

we can use: cuda to run the Model 


In [104]:

#dataset_path = sys.argv[1];
#folder = sys.argv[2];
#filename = sys.argv[3];

dataset_path = "MC_Modeled_spectra.csv"; # monte carlo dataset 
folder = "Data_Mike" + "/";
filename = "R_(1measurement)_normalized_NoGap[202,202,79].mat"; # hyperspectral image(pixels, wavelengths)


HSI_mat_data_file_to_load = folder + filename; # HSI file
# copy from the given code, it will be saved by using np.save() with the format of .npy
saved_Results_file = folder + filename + dataset_path; 
provided_npy_file = "R_(1measurement)_normalized_NoGap[202,202,79].matData_small.csv.npy"
Results_from_given_npy = np.load(provided_npy_file, allow_pickle=True)
print("Results.shape", Results_from_given_npy.shape)

Hyperspectra_data = loadmat(HSI_mat_data_file_to_load)['R1'];
(x_total, y_total, wavelenght) = Hyperspectra_data.shape;
print("Hyperspectral data: x_total:", x_total, "y_total:", y_total, "wavelengh:", wavelenght);

Hyperspectra_data_resized = Hyperspectra_data[:,:, 0:42:2]; # Get from 510 to 720 nm with 10nm step size
print(f"Resized Hyperspectra data shape : {Hyperspectra_data_resized.shape}")


Results.shape (202, 202, 11)
Hyperspectral data: x_total: 202 y_total: 202 wavelengh: 79
Resized Hyperspectra data shape : (202, 202, 21)


In [105]:
def mse_loss(y_pred, y_true):
    squared_error = (y_pred - y_true) ** 2
    sum_squared_error = np.sum(squared_error)
    mse_loss = sum_squared_error / y_true.size
    sse = sum((y_true - y_pred)**2)
    tse = (len(y_true) - 1) * np.var(y_true, ddof=1)
    r2_score = 1 - (sse / tse)    
    return mse_loss, r2_score, sse, tse

Construct the 2D CNN model

- Creation and training of a 2D CNN classifier within a Jupyter notebook which takes into account the neighbouring pixels of HSI data.
- Test at least two 2D convolution filters sizes (e.g. 3x3, 5x5) and outline their corresponding performances.
- Tune the hyperparameters in order to achieve the optimal trade-off between model performance and training time.
- Present the output from you 2D CNN and compare with cross-correlation, DT, 1D CNN or MLP.
- Discuss potential future avenues for improving classification accuracy

In [106]:
# # https://www.kaggle.com/artgor/simple-eda-and-model-in-pytorch/notebook
# # https://www.mashen.zone/thread-1825047.htm
# # https://blog.csdn.net/nanke_4869/article/details/113458729
# # CNN model
# something has to be commented out since no enough GPU memory in Colab
class CNN(nn.Module):
  """
https://datascience.stackexchange.com/questions/40906/determining-size-of-fc-layer-after-conv-layer-in-pytorch
  https://towardsdatascience.com/classification-of-fruit-images-using-neural-networks-pytorch-1d34d49342c7
  """
  def __init__(self):
      super().__init__()
      self.network = nn.Sequential(
          nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
          nn.BatchNorm2d(16),
          nn.ReLU(inplace=True),
          nn.MaxPool2d(kernel_size=2, stride=2),
          nn.Dropout(0.25),
          nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
          nn.BatchNorm2d(32),
          nn.ReLU(inplace=True),
          
          
        #   nn.Linear(32 * 101 * 101, 128),
          nn.ReLU(inplace=True),
          nn.Dropout(0.5),
        #   nn.Linear(128, 2)
          
      )
      
  def forward(self, xb):
      return self.network(xb)


  def to_model_string(self):
      return 'CNN'


In [107]:
def get_data_loader(ds, y_name, batch_size):
    X = ds.iloc[:, 46:67].values
    y = ds[y_name].values
    # add 3 zerovalued columns to X , to make it 25 columns
    # so 25 columns can be reshaped to 5x5 matrix and can be used as input to CNN
    X = np.hstack((X, np.zeros((X.shape[0], 4))))
    # X = X.reshape(-1, 1, 5, 5)
    print(f"X.shape: {X.shape}, y.shape: {y.shape}")
    X = torch.from_numpy(X).float()
    y = torch.from_numpy(y).float()
    dataset = torch.utils.data.TensorDataset(X, y)
    
    train_set, valid_set = train_test_split(dataset, test_size=0.2, random_state=42)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=True)
    return train_loader, test_loader

def loss_batch(model, loss_func, xb, yb, opt=None):
    # Generate predictions
    pred = model(xb)
    # Calculate loss
    loss = loss_func(pred, yb)
    if opt is not None:
        # Compute gradients
        loss.backward()
        # Update parameters
        opt.step()
        # Reset gradients
        opt.zero_grad()
    return loss.item(), len(xb)


def train_model(model, train_loader, val_loader, optimizer, loss_fn, epochs):
    model.train()
    for epoch in range(epochs):
        for batch_index, batch in enumerate(train_loader):
            X, y = batch
            X, y = X.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            # print(f"X.shape: {X.shape}, y.shape: {y.shape}")
            # need to handle RuntimeError that expecteding 4D input, but got 2D input
            X = X.reshape(-1, 1, 5, 5)
            y_pred = model(X)
            loss = loss_fn(y_pred, y)
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_fn, X, y, opt=None) for X, y in val_loader]
            )
            val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
        print(f"Epoch: {epoch}, val_loss: {val_loss}")
    return val_loss , model

ds = pd.read_csv(dataset_path)
mel_trainLoader, mel_testLoader = get_data_loader(ds, 'Mel', 32)
mel_model = CNN().to(DEVICE)
mel_optimizer = torch.optim.Adam(mel_model.parameters(), lr=0.001)
mel_loss_fn = nn.MSELoss()
mel_val_loss, mel_model = train_model(mel_model, mel_trainLoader, mel_testLoader, mel_optimizer, mel_loss_fn, 10)

X.shape: (218700, 25), y.shape: (218700,)


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (2) must match the size of tensor b (32) at non-singleton dimension 3

In [None]:
ds = pd.read_csv(dataset_path)
blsdbn_trainLoader, blsdbn_testLoader = get_data_loader(ds, 'BlSDBN', 64)
blsdbn_model = CNN().to(DEVICE)
blsdbn_optimizer = torch.optim.Adam(blsdbn_model.parameters(), lr=0.001)
blsdbn_loss_fn = nn.MSELoss()


def predict(model, test_loader):
    model.eval()
    with torch.no_grad():
        y_preds = []
        for batch in test_loader:
            X, y = batch
            X, y = X.to(DEVICE), y.to(DEVICE)
            y_pred = model(X)
            y_preds.append(y_pred)
    return y_preds




