In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024"

In [9]:
mask_filepath = "./FX17/Masks/HSI1_Test"
data_filepath = "./FX17/Data/HSI1_Test"

In [3]:
#Factor is saing that the image should be divided into 9 sections 3*3 = 9 (kinda like 3 rows 3 columns)
import numpy as np
def section(img , factor = 3):
    secs = []

    #This basicaly tests if the image can actually get divided into equal sections
    if (img.shape[0] % factor != 0):
        return False

    #number of pixel in each row and column of the sections
    pix_num = int(img.shape[0] / factor)

    ptr_x_a = 0
    ptr_x_b = pix_num


    for i in range(factor):

        ptr_y_a = 0
        ptr_y_b = pix_num
        
        for j in range(factor):

            secs.append(img[ptr_x_a :ptr_x_b , ptr_y_a : ptr_y_b])
            ptr_y_a += pix_num
            ptr_y_b += pix_num
    
        ptr_x_a += pix_num
        ptr_x_b += pix_num
    
    return np.array(secs)

In [4]:
from scipy.signal import savgol_filter
def preprocess(input_data):
    data_preprocess = np.zeros_like(input_data)
    data_preprocess = savgol_filter(input_data, 15, 2)
    data_preprocess = np.gradient(data_preprocess, axis = 2)
    data_preprocess = (data_preprocess - np.mean(data_preprocess))/np.std(data_preprocess)
    
    return data_preprocess

In [18]:
#Pixel level classification
from spectral import imshow, view_cube, save_rgb
import spectral.io.envi as envi
import numpy as np
import json
import os
from PIL import Image
import numpy as np

def mask2rgb(mask):
    rgb = np.zeros(mask.shape+(3,), dtype=np.uint8)
    
    for i in np.unique(mask):
            rgb[mask==i] = LABEL_TO_COLOR[i]
            
    return rgb

def rgb2mask(rgb):
    mask = np.zeros((rgb.shape[0], rgb.shape[1]))
    for k,v in LABEL_TO_COLOR.items():
        mask[np.all(rgb==v, axis=2)] = k
        
    return mask

LABEL_TO_COLOR =  {0:[0,0,0], 1:[255,0,0], 2:[0,255,0], 3:[0,0,255], 4:[255,255,0], 5:[255,0,255], 6:[0,255,255], 7: [255,255,128], 8:[255,128,255], 9:[128,255,255]}

filenames = os.listdir(data_filepath)
train_hsi_pixel_data = []
train_hsi_pixel_label = []

for i in range(len(filenames)):
    filename = filenames[i]
    maskname = filename+"_mask.png"
    print(f"computing for {filename}")
    print(f"computing for mask {maskname}")
    dark_ref = envi.open(data_filepath + '/' + filename + '/capture/DARKREF_' + filename + '.hdr', data_filepath + '/' + filename + '/capture/DARKREF_' + filename + '.raw')
    white_ref = envi.open(data_filepath + '/' + filename + '/capture/WHITEREF_' + filename + '.hdr', data_filepath + '/' + filename + '/capture/WHITEREF_' + filename + '.raw')
    data_ref = envi.open(data_filepath + '/' + filename + '/capture/' + filename + '.hdr', data_filepath + '/' + filename + '/capture/' + filename + '.raw')
    
    white_nparr = np.array(white_ref.load())
    dark_nparr = np.array(dark_ref.load())
    data_nparr = np.array(data_ref.load())
    corrected_nparr = np.divide(
        np.subtract(data_nparr, np.mean(dark_nparr, axis = 0)),
        np.subtract(np.mean(white_nparr, axis = 0), np.mean(dark_nparr, axis = 0)))
    
    if corrected_nparr.shape[0] != 640:
        #print(corrected_nparr.shape[0])
        corrected_nparr = np.concatenate((corrected_nparr,corrected_nparr[-1].reshape(1,640,224)), axis=0)
    
    corrected_nparr = preprocess(corrected_nparr[:,:,8:208])
    print(f"corrected_nparr shape {corrected_nparr.shape}")
    
    img = Image.open(mask_filepath + "/" + maskname)
    mask = np.array(img)
    print(f"mask shape {mask.shape}")
    
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            if mask[i][j][0] == 255 and mask[i][j][1] == 0 and mask[i][j][2] == 0:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[0])
            elif mask[i][j][0] == 0 and mask[i][j][1] == 255 and mask[i][j][2] == 0:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[1])
            elif mask[i][j][0] == 0 and mask[i][j][1] == 0 and mask[i][j][2] == 255:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[2])
            elif mask[i][j][0] == 255 and mask[i][j][1] == 255 and mask[i][j][2] == 0:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[3])
            elif mask[i][j][0] == 255 and mask[i][j][1] == 0 and mask[i][j][2] == 255:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[4])
            elif mask[i][j][0] == 0 and mask[i][j][1] == 255 and mask[i][j][2] == 255:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[5])
            elif mask[i][j][0] == 255 and mask[i][j][1] == 255 and mask[i][j][2] == 128:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[6])
            elif mask[i][j][0] == 255 and mask[i][j][1] == 128 and mask[i][j][2] == 255:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[7])
            elif mask[i][j][0] == 128 and mask[i][j][1] == 255 and mask[i][j][2] == 255:
                train_hsi_pixel_data.append(corrected_nparr[i][j])
                train_hsi_pixel_label.append(np.eye(9)[8])


computing for FX17_HSI1_0003
computing for mask FX17_HSI1_0003_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)
computing for FX17_HSI1_0036
computing for mask FX17_HSI1_0036_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)
computing for FX17_HSI1_0038
computing for mask FX17_HSI1_0038_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)
computing for FX17_HSI1_0026
computing for mask FX17_HSI1_0026_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)
computing for FX17_HSI1_0014
computing for mask FX17_HSI1_0014_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)
computing for FX17_HSI1_0029
computing for mask FX17_HSI1_0029_mask.png
corrected_nparr shape (640, 640, 200)
mask shape (640, 640, 3)


In [23]:
len(train_hsi_pixel_data)

607555

In [24]:
# For both train and test the name is train_hsi_pixel_data
test_pixels = np.array(train_hsi_pixel_data)
test_labels = np.array(train_hsi_pixel_label)

In [25]:
test_labels.shape

(607555, 9)

In [26]:
test_pixels.shape

(607555, 200)

In [27]:
import torch
from torch.utils.data import Dataset, DataLoader
class makeDataset(Dataset):
    def __init__(self, X, Y): 
        self.x = X
        self.y = Y
        #self.i = index
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index):
        feature = self.x[index, :]
        label = self.y[index, :]
        #i = self.i[index]
        
        feature = torch.tensor(feature, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.float32)
        return {
            'features': feature,
            'labels' : label,
            #'index' : i
        }

In [55]:
test_dataset = makeDataset(test_pixels, test_labels)
test_loader = DataLoader(test_dataset, batch_size = 128, shuffle=True)

In [29]:
#Pixel Level Classification
import torch
from torch import nn
import torch.nn.functional as F

class DeepSpectra(nn.Module):
    def __init__(self, num_classes):
        super(DeepSpectra, self).__init__()
        self.num_classes = num_classes
        self.pre = pre_block(sampling_point = 200)
        self.conv1 = conv_block(
            in_channels=1,
            out_channels=8,
            kernel_size=3,
            stride=1,
            padding = 1
        )
        self.conv2 = nn.Conv1d(8, 16, 3, 1, 1)

        # In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
        self.inception1 = Naive_inception_block(16, 8, 8, 8, 8)
        #self.inception2 = Naive_inception_block(32, 16, 16, 16, 16)
        self.fc1 = nn.Linear(50*32, 128)
        self.dropout = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(128, self.num_classes)
        self.bn1 = nn.BatchNorm1d(128)

    def forward(self, x):
        x = self.pre(x)
        x = F.relu(self.conv1(x))
        x = F.max_pool1d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool1d(x, 2)
        x = self.inception1(x)
        #x = self.inception2(x)
        x = x.view(-1, 50*32)
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return F.softmax(x)


class Naive_inception_block(nn.Module):
    def __init__(
        self, in_channels, out_1x1, out_3x3, out_5x5, out_1x1pool):
        super(Naive_inception_block, self).__init__()
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)

        self.branch2 = conv_block(in_channels, out_3x3, kernel_size=3, padding=1)

        self.branch3 = conv_block(in_channels, out_5x5, kernel_size=5, padding=2)
        
        self.branch4 = nn.Sequential(
            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
            conv_block(in_channels, out_1x1pool, kernel_size=1),
        )
        
    def forward(self, x):
        return torch.cat(
            [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
        )
        
    
class Inception_block(nn.Module):
    def __init__(
        self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
    ):
        super(Inception_block, self).__init__()
        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            conv_block(in_channels, red_3x3, kernel_size=1),
            conv_block(red_3x3, out_3x3, kernel_size=3, padding=1),
        )

        self.branch3 = nn.Sequential(
            conv_block(in_channels, red_5x5, kernel_size=1),
            conv_block(red_5x5, out_5x5, kernel_size=5, padding=2),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
            conv_block(in_channels, out_1x1pool, kernel_size=1),
        )

    def forward(self, x):
        return torch.cat(
            [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
        )

class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(conv_block, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv1d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm1d(out_channels)

    def forward(self, x):
        return self.relu(self.conv(x))
    
class pre_block(nn.Module):
    def __init__(self, sampling_point):
        super().__init__()
        self.pool1 = nn.AvgPool1d(kernel_size = 5, stride = 1, padding = 2)
        self.pool2 = nn.AvgPool1d(kernel_size = 13, stride = 1, padding = 6)
        self.pool3 = nn.AvgPool1d(kernel_size = 7, stride = 1, padding = 3)
        self.ln = nn.LayerNorm(sampling_point)
        
    def forward(self, x):
        x = self.pool1(x)
        x = self.pool2(x)
        x = self.pool3(x)
        x = self.ln(x)
        
        return x

In [43]:
#Pixel Level Classification
from tqdm import tqdm
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Running on a GPU")
else:
    device = torch.device("cpu")
    print("Running on a CPU")
    
def initialize_weights(m):
    if isinstance(m, nn.Conv1d):
        nn.init.xavier_uniform_(m.weight.data)
        
def train(model, dataloader, optimizer, loss_fn):
    print("training started")
    model.train()
    counter = 0
    train_running_loss = 0.0
    train_running_acc=0.0
    for i, data in tqdm(enumerate(dataloader)):
        counter += 1
        # print(f"class type of input data {type(data)}")
        # extract the features and labels
        features = data['features'].view(-1,1,200).to(device)
        # print(features.size())
        labels = data['labels'].to(device)
        
        # zero-out the optimizer gradients
        optimizer.zero_grad()
        outputs = model(features)
        #outputs = model(features, labels, infer = False)
        loss = loss_fn(outputs, labels)
        matches = [torch.argmax(i) == torch.argmax(j) for i, j in zip(outputs,labels)]
        acc = matches.count(True)/len(matches)
        train_running_loss += loss.item()
        train_running_acc += acc
        
        # backpropagation
        loss.backward()
        # update optimizer parameters
        optimizer.step()
        
    train_loss = train_running_loss / counter
    train_acc = train_running_acc / counter   
    print(train_acc) 
    return train_loss, train_acc

Running on a GPU


In [31]:
def eval(dataloader, model, loss_fn):
    model.eval()
    counter = 0
    running_loss = 0.0
    running_acc=0.0
    matches = []
    mislabel =[]
    with torch.no_grad():
        for data in tqdm(dataloader):
            counter += 1
            # extract the features and labels
            features = data['features'].view(-1,1,200).to(device)
            labels = data['labels'].to(device)
            #index = data['index'].to(device)
            outputs = model(features)
            loss = loss_fn(outputs, labels)
            running_loss += loss.item()
            matches = [torch.argmax(i) == torch.argmax(j) for i, j in zip(outputs,labels)]
            acc = matches.count(True)/len(matches)
            running_acc += acc
            
    loss = running_loss / counter
    accuracy = running_acc / counter  
    return accuracy, loss

In [56]:
import torch.optim as optim
num_class = 9
epochs = 2
model = DeepSpectra(num_class).to(device)
optimizer = optim.Adam(params=model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
train_loss, train_acc = train(model, test_loader, optimizer, loss_fn)


training started


0it [00:00, ?it/s]

  return F.softmax(x)
4747it [01:08, 69.07it/s]

0.9657974773541184



