### Rady pro použití
1. Změň dataset_path - cesta do složky s daty, tato složka by měla obsahovat podsložku s obrazovými daty (MHS, CIR, RGB nebo PAN), podsložku s referenčními daty (GT) a prázdné složky pro ukládání výsledků (results) a natrénovaných modelů (models)
2. Změň use_mhs - číslo v této proměnné udává počet vstupních spektrálních pásem

In [1]:
import os
import torch
import numpy as np
import imageio
import matplotlib
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim

from time import time as time
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, jaccard_score
from sklearn.model_selection import KFold
import torchnet as tnt
import functools
import mock
from tqdm import notebook as tqdm
from distutils.dir_util import copy_tree


# GLOBAL SETTINGS
PlotSize = 12                                     # Size of plots
matplotlib.rcParams['figure.figsize'] = [PlotSize*2, PlotSize]  
CMAP = matplotlib.colors.ListedColormap(['black', 'white', 'orange'])               # Color mapping 
np.set_printoptions(precision=2, suppress=True)  # Array print precision

# PATHS TO TRAIN/TEST DATA
dataset_path = 'e:\\datasets\\test_unet\\Krkonose2012\\overlap'
num_of_tiles = len(os.listdir(os.path.join(dataset_path, 'GT')))
print(f'Number of tiles to be processed: \n{num_of_tiles}\n')

# USE CIR, RGB, PAN DATA
use_cir = False
use_rgb = False
use_pan = False

# USE multi/hyperspectral DATA (first value is a bool similar to use_rgb etc. and second value is the number of bands)
use_mhs = (True, 6)

print(f'Total number of bands used: \n{use_cir*3 + use_rgb*3 + use_pan + use_mhs[0]*use_mhs[1]}')

# MODEL NAME... USED AS FILENAME OF SAVED MODEL AND FOR APPROPRIATE RESULTS FOLDER
model_name = 'U_Net'

Number of tiles to be processed: 
225

Total number of bands used: 
6


In [2]:
def read_imagery(img_dir):
    """Reads the individual imagery patches and prepares them for """
    img_file_list = os.listdir(img_dir)
    img_list = []
        
    for file in img_file_list:
        img_patch = imageio.imread(os.path.join(img_dir, file)).astype(np.float32)
        img_patch = img_patch[:,:,:].transpose([2,0,1])
        img_patch = img_patch * 1/255
            
        img_list.append(img_patch)
        del img_patch

    img_features = np.stack(img_list, axis=0)
    return img_features

def read_patch(root_folder, cir, rgb, pan, mhs, gt=True):
    ##########################################################
    # READ IMAGES as FLOAT
    
    if cir:
        cir_features = read_imagery(os.path.join(root_folder, 'CIR'))
    if rgb:
        rgb_features = read_imagery(os.path.join(root_folder, 'RGB'))
    if mhs[0]:
        mhs_features = read_imagery(os.path.join(root_folder, 'MHS'))

    if pan:
        pan_file_list = os.listdir(os.path.join(root_folder, 'PAN'))
        pan_list = []
        for file in pan_file_list:
            pan_patch = imageio.imread(os.path.join(root_folder, 'PAN', file)).astype(np.float32)
            pan_patch = pan_patch * 1/255
            pan_patch = np.expand_dims(pan_patch, axis=0)
            pan_list.append(pan_patch)
            del pan_patch
        pan_features = np.stack(pan_list, axis=0)


    if cir and rgb:
        features = np.concatenate([cir_features, rgb_features], axis=1)
    elif cir:
        features = cir_features
    elif rgb:
        features = rgb_features
    elif pan:
        features = pan_features
    elif mhs:
        features = mhs_features
    else:
        print('No valid data input.')
    features = torch.from_numpy(features)
    
    
    if gt:
        gt_file_list = os.listdir(os.path.join(root_folder, 'GT'))
        gt_list = []

        for file in gt_file_list:
            gt_patch = imageio.imread(os.path.join(root_folder, 'GT', file)).astype(np.int64)
            # assigns 0 to classes 3 and above
            # gt_patch[gt_patch > 2] = 0
            
            gt_list.append(gt_patch[:,:])
            del gt_patch

        ground_truth = np.stack(gt_list, axis=0)
        ground_truth = torch.from_numpy(ground_truth)
    
    if gt:
        return features, ground_truth
    else:
        return features

In [3]:
### putting the dataset into the TensorDataset wrapper
data_features, data_labels = read_patch(dataset_path, use_cir, use_rgb, use_pan, use_mhs)

print(f'Size of image data: \n{data_features.shape}\n')
print(f'Size of reference data: \n{data_labels.shape}\n')

dataset = tnt.dataset.TensorDataset(list([data_features, data_labels]))

Size of image data: 
torch.Size([225, 6, 256, 256])

Size of reference data: 
torch.Size([225, 256, 256])



In [4]:
unique, counts = np.unique(data_labels, return_counts=True)
print(f'Class labels: \n{unique}\n')
print(f'Number of pixels in a class: \n{counts}')

Class labels: 
[0 1 2]

Number of pixels in a class: 
[9834937 4148500  762163]


In [5]:
class UNet(nn.Module):
    """
    U-Net for semantic segmentation
    """
  
    def __init__(self, n_channels, encoder_conv_width, decoder_conv_width, n_class, cuda):
        """
        initialization function
        n_channels, int, number of input channel
        encoder_conv_width, int list, size of the feature maps of convs for the encoder
        decoder_conv_width, int list, size of the feature maps of convs for the decoder
        n_class = int,  the number of classes
        """
        super(UNet, self).__init__() #necessary for all classes extending the module class
    
        self.maxpool=nn.MaxPool2d(2,2,return_indices=False) #maxpooling layer
        self.dropout=nn.Dropout2d(p=0.5, inplace=True)
    
        #encoder
        self.c1 = nn.Sequential(nn.Conv2d(n_channels,encoder_conv_width[0],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[0]),nn.ReLU())
        self.c2 = nn.Sequential(nn.Conv2d(encoder_conv_width[0],encoder_conv_width[1],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[1]),nn.ReLU())
        self.c3 = nn.Sequential(nn.Conv2d(encoder_conv_width[1],encoder_conv_width[2],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[2]),nn.ReLU())
        self.c4 = nn.Sequential(nn.Conv2d(encoder_conv_width[2],encoder_conv_width[3],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[3]),nn.ReLU())
        self.c5 = nn.Sequential(nn.Conv2d(encoder_conv_width[3],encoder_conv_width[4],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[4]),nn.ReLU())
        self.c6 = nn.Sequential(nn.Conv2d(encoder_conv_width[4],encoder_conv_width[5],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[5]),nn.ReLU())
        self.c7 = nn.Sequential(nn.Conv2d(encoder_conv_width[5],encoder_conv_width[6],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[6]),nn.ReLU())
        self.c8 = nn.Sequential(nn.Conv2d(encoder_conv_width[6],encoder_conv_width[7],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[7]),nn.ReLU())
        self.c9 = nn.Sequential(nn.Conv2d(encoder_conv_width[7],encoder_conv_width[8],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[8]),nn.ReLU())
        self.c10 = nn.Sequential(nn.Conv2d(encoder_conv_width[8],encoder_conv_width[9],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(encoder_conv_width[9]),nn.ReLU())
        #decoder
        self.c11 = nn.ConvTranspose2d(encoder_conv_width[9], int(decoder_conv_width[0]/2),kernel_size=2, stride=2)
        self.c12 = nn.Sequential(nn.Conv2d(decoder_conv_width[0],decoder_conv_width[1],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[1]),nn.ReLU())
        self.c13 = nn.Sequential(nn.Conv2d(decoder_conv_width[1],decoder_conv_width[2],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[2]),nn.ReLU())
        self.c14 = nn.ConvTranspose2d(decoder_conv_width[2], int(decoder_conv_width[3]/2),kernel_size=2, stride=2)
        self.c15 = nn.Sequential(nn.Conv2d(decoder_conv_width[3],decoder_conv_width[4],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[4]),nn.ReLU())
        self.c16 = nn.Sequential(nn.Conv2d(decoder_conv_width[4],decoder_conv_width[5],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[5]),nn.ReLU())
        self.c17 = nn.ConvTranspose2d(decoder_conv_width[5], int(decoder_conv_width[6]/2),kernel_size=2, stride=2)
        self.c18 = nn.Sequential(nn.Conv2d(decoder_conv_width[6],decoder_conv_width[7],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[7]),nn.ReLU())
        self.c19 = nn.Sequential(nn.Conv2d(decoder_conv_width[7],decoder_conv_width[8],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[8]),nn.ReLU())
        self.c20 = nn.ConvTranspose2d(decoder_conv_width[8], int(decoder_conv_width[9]/2),kernel_size=2, stride=2)
        self.c21 = nn.Sequential(nn.Conv2d(decoder_conv_width[9],decoder_conv_width[10],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[10]),nn.ReLU())
        self.c22 = nn.Sequential(nn.Conv2d(decoder_conv_width[10],decoder_conv_width[11],3,padding=1, padding_mode='reflect'),nn.BatchNorm2d(decoder_conv_width[11]),nn.ReLU()) 
        
        #final classifying layer
        self.classifier=nn.Conv2d(decoder_conv_width[11],n_class,1,padding=0)

        #weight initialization

        self.c1[0].apply(self.init_weights)
        self.c2[0].apply(self.init_weights)
        self.c3[0].apply(self.init_weights)
        self.c4[0].apply(self.init_weights)
        self.c5[0].apply(self.init_weights)
        self.c6[0].apply(self.init_weights)
        self.c7[0].apply(self.init_weights)
        self.c8[0].apply(self.init_weights)
        self.c9[0].apply(self.init_weights)
        self.c10[0].apply(self.init_weights)
        
        self.c12[0].apply(self.init_weights)
        self.c13[0].apply(self.init_weights)
        
        self.c15[0].apply(self.init_weights)
        self.c16[0].apply(self.init_weights)
        
        self.c18[0].apply(self.init_weights)
        self.c19[0].apply(self.init_weights)
        
        self.c21[0].apply(self.init_weights)
        self.c22[0].apply(self.init_weights)
        self.classifier.apply(self.init_weights)
    
        if cuda: #put the model on the GPU memory
            self.cuda()
    
    def init_weights(self,layer): #gaussian init for the conv layers
        nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
    
    def forward(self,input):
        """
        the function called to run inference
        """  
        #encoder
        #level 1
        x1 = self.c2(self.c1(input))
        x2 = self.maxpool(x1)
        #level 2
        x3 = self.c4(self.c3(x2))
        x4 = self.maxpool(x3)
        #level 3
        x5 = self.c6(self.c5(x4))
        x6 = self.maxpool(x5)
        #Level 4
        x7 = self.c8(self.c7(x6))
        x8 = self.maxpool(x7)
        #Level 5
        x9 = self.c10(self.c9(x8))
        #decoder
        #Level 4
        y8 = torch.cat((self.c11(x9),x7),1)
        y7 = self.c13(self.c12(y8))
        #Level 3
        y6 = torch.cat((self.c14(y7),x5),1)
        y5 = self.c16(self.c15(y6))
        #level 2
        y4 = torch.cat((self.c17(y5),x3),1)
        y3 = self.c19(self.c18(y4))
        #level 1       
        y2 = torch.cat((self.c20(y3),x1),1)
        y1 = self.c22(self.c21(y2))
        #output         
        out = self.classifier(self.dropout(y1))
    
        return out

In [6]:
def augment(obs, g_t):
    """the data augmentation function, introduces random noise and rotation"""
    sigma, clip= 0.01, 0.03 
    #Hint: use np.clip to clip and np.random.randn to generate gaussian noise
    obs = obs + np.clip(sigma*np.random.randn(), -clip, clip).astype(np.float32).copy()

    #random rotation 0 90 180 270 degree
    n_turn = np.random.randint(4) #number of 90 degree truens, random int between 0 and 3
    obs = np.rot90(obs, n_turn, axes=(2,3)).copy()
    g_t = np.rot90(g_t, n_turn, axes=(1,2)).copy()

    obs = torch.from_numpy(obs)
    g_t = torch.from_numpy(g_t)
    
    return obs, g_t

In [7]:
def train(model, optimizer, args):
    """train for one epoch"""
    model.train() #switch the model in training mode
  
    #the loader function will take care of the batching
    loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=args.train_subsampler)
    loader = tqdm.tqdm(loader, ncols=500)
  
    #will keep track of the loss
    loss_meter = tnt.meter.AverageValueMeter()

    for index, (tiles, gt) in enumerate(loader):
    
        optimizer.zero_grad() #put gradient to zero
                
        tiles, gt = augment(tiles, gt)
    
        pred = model(tiles.cuda()) #compute the prediction

        loss = nn.functional.cross_entropy(pred.cpu(),gt, weight=torch.tensor(args.class_weights))

        loss.backward() #compute gradients

        for p in model.parameters(): #we clip the gradient at norm 1
            p.grad.data.clamp_(-1, 1) #this helps learning faster
    
        optimizer.step() #one SGD step
    
        loss_meter.add(loss.item())
        
    return loss_meter.value()[0]

def eval(model, args):
    """eval on test/validation set"""
  
    model.eval() #switch in eval mode
  
    loader = torch.utils.data.DataLoader(dataset, batch_size=1, sampler=args.test_subsampler)
    loader = tqdm.tqdm(loader, ncols=500)
  
    loss_meter = tnt.meter.AverageValueMeter()

    with torch.no_grad():
        for index, (tiles, gt) in enumerate(loader):
            pred = model(tiles.cuda())
            loss = nn.functional.cross_entropy(pred.cpu(),gt)
            loss_meter.add(loss.item())

    return loss_meter.value()[0]


def train_full(args):
    """The full training loop"""

    #initialize the model
    model = UNet(args.n_channel, args.conv_width, args.dconv_width, args.n_class, args.cuda)

    print('Total number of parameters: {}'.format(sum([p.numel() for p in model.parameters()])))
  
    #define the optimizer
    #adam optimizer is always a good guess for classification
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60,80,95], gamma=0.3)
  
    TESTCOLOR = '\033[104m'
    NORMALCOLOR = '\033[0m'
  
    train_loss = np.empty(args.n_epoch)
    test_loss = np.empty(args.n_epoch//args.n_epoch_test)
    test_i = 0

    for i_epoch in range(args.n_epoch):
        #train one epoch
        print('Epoch ' + str(i_epoch))
        loss_train = train(model, optimizer, args)
        scheduler.step()
        train_loss[i_epoch] = loss_train

        if (i_epoch == args.n_epoch - 1) or (args.n_epoch_test != 0 and i_epoch % args.n_epoch_test == 0 and i_epoch > 0):
            #periodic testing
            print(TESTCOLOR)
            print('Evaluation')
            loss_test = eval(model, args)
            test_loss[test_i] = loss_test
            test_i += 1

    plt.figure(figsize=(10, 10))
    plt.subplot(1,1,1,ylim=(0,2), xlabel='Epoch #', ylabel='Loss')
    plt.plot(range(args.n_epoch), train_loss)
    plt.plot(range(args.n_epoch_test-1, args.n_epoch, args.n_epoch_test), test_loss)
    plt.show()
    print(train_loss)
    print(test_loss)
    args.loss_test = loss_test
    
    return model

### Rady pro použití 2
1. Změň args.n_class - počet klasifikačních tříd
2. Změň args.class weights, tak aby jejich součet zůstal 1 (váha tříd při tréninku, méně zastoupené třídy lze klasifikovat lépe díky vyšší hodnotě této proměnné)
3. Experimentuj s různými hodnotami args.n_epoch (počet trénovacích epoch), args.lr (learning rate, rychlost se kterou se sit uci) a args.batch_size (počet snímků v minibatchi pro trénink, výrazně omezeno pamětí grafické karty)
4. Hodnotami na konci proměnných args.conv_width a args.dconv_width (druhé parametry funkce np.divide()) lze změnit "velikost sítě" - počet konvolučních filtrů v každé vrstvě sítě... 1 znamená U-Net v originální podobě (Ronneberger et al. 2015), doporučuji začít s hodnotou 2 (poloviční síť oporoti původní) a případně snížit hodnotu kdyby se ukázalo, že se model nezvládá naučit prostorové vztahy ve snímku)

In [8]:
args = mock.Mock() #stores the parameters
args.n_epoch = 50
args.n_epoch_test = int(5) #periodicity of evaluation on test set
args.batch_size = 1
args.n_class = 3
args.n_channel = use_cir*3 + use_rgb*3 + use_pan + use_mhs[0]*use_mhs[1]
args.conv_width =  np.divide([64,64,128,128,256,256,512,512,1024,1024],        4).astype(np.int)
args.dconv_width = np.divide([1024,512,512,512,256,256,256,128,128,128,64,64], 4).astype(np.int)
args.class_weights = [0.1, 0.1, 0.8]
args.cuda = True
args.lr = 5e-4
args.crossval_nfolds = 3
args.model_save_folder = os.path.join(dataset_path, 'models')
kfold = KFold(n_splits = args.crossval_nfolds, shuffle=True)

In [9]:
# Model training itself, using crossvalidation
model_results = {}
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    args.train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    args.test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    print(train_ids.shape)
    print(test_ids.shape)
    a = time()
    trained_model = train_full(args)
    model_results[fold] = args.loss_test
    
    print('Saving the model to:')
    state_dict_path = os.path.join(args.model_save_folder, f'{model_name}_fold_{str(fold)}.pt')
    print(state_dict_path)
    torch.save(trained_model.state_dict(), state_dict_path)
    print(f'Training finished in {str(time()-a)}s')

print('\n')
print(f'Resulting loss for individual folds: \n{model_results}')
print(f'Mean loss across all folds: \n{np.mean(model_results)}')

(150,)
(75,)
Total number of parameters: 1944515
Epoch 0


  0%|                                                                                                         …

Epoch 1


  0%|                                                                                                         …

Epoch 2


  0%|                                                                                                         …

KeyboardInterrupt: 

## Loading a trained model
Change state_dict_path to the trained model you want to use

In [None]:
# Path to the state_dictionary
state_dict_path = 'E:\\datasets\\test_unet\\Krkonose2012\\overlap\\models\\fold_0.pt'

# Parameters for model definition
args = mock.Mock() #stores the parameters

args.n_class = 3
args.n_channel = 6 # 6 if use_cir and use_rgb else 3
args.conv_width =  np.divide([64,64,128,128,256,256,512,512,1024,1024],        4).astype(np.int)
args.dconv_width = np.divide([1024,512,512,512,256,256,256,128,128,128,64,64], 4).astype(np.int)
args.cuda = True

# Load a trained model state_dictionary
model = UNet(args.n_channel, args.conv_width, args.dconv_width, args.n_class, args.cuda)
model.load_state_dict(torch.load(state_dict_path))
model.eval()

## Computing accuracy metrics
Computes precision, recall and f1-score for each class as well as overall accuracy and mean f1-score

In [None]:
def classify(model, args):
    """eval on test/validation set"""
  
    model.eval() #switch in eval mode
    loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, drop_last=False)
    loader = tqdm.tqdm(loader, ncols=500)
    
    classified = np.empty_like(y_t.detach().numpy())
    
    with torch.no_grad():
        for index, (tiles, gt) in enumerate(loader):
            pred = model(tiles.cuda()).cpu().detach().numpy()
            classified[index, :, :] = pred.squeeze().argmax(0)

    return classified

In [None]:
# Number of pixels belonging to each class in the reference dataset

y_t = data_labels
y_t_flat = y_t.detach().numpy().flatten()

unique, counts = np.unique(y_t_flat, return_counts=True)
print(unique)
print(counts)

In [None]:
# Number of pixels belonging to each class in the classified dataset

a = time()
Y_t = classify(model, args)
b = time()
print('Inferrence finished in ' + str(b-a) + ' s')

Y_t_flat = Y_t.flatten()

unique, counts = np.unique(Y_t_flat, return_counts=True)
print(unique)
print(counts)

In [None]:
# Accuracy metrics

precisions, recalls, f1_scores, supports = precision_recall_fscore_support(y_t_flat, Y_t_flat)
overall_accuracy = accuracy_score(y_t_flat, Y_t_flat)
mean_f1_score = sum(f1_scores)/len(f1_scores)

print('precisions [%]:      ', precisions*100)
print('recalls    [%]:      ', recalls*100)
print('f1_scores  [%]:      ', f1_scores*100)
print('')
print('overall accuracy: {:.2%}'.format(overall_accuracy))
print('mean f1 score:    {:.2%}'.format(mean_f1_score))

## Export results
Results are not georeferenced – use Georeference_results_gdal.ipynb for georeferencing and combining into a single raster
Change source_path to the origina root directory

In [None]:
source_path = 'E:\\datasets\\test_unet\\Krkonose2012\\overlap'
results_path = os.path.join(source_path, 'results')

In [None]:
# Load images to classify
in_features = read_patch(source_path, use_cir, use_rgb, use_pan, use_mhs, gt=False)
print(in_features.shape)

In [None]:
# Create files for classified tiles
if use_rgb:
    copy_tree(os.path.join(source_path, 'RGB'), results_path, update=1)
elif use_cir:
    copy_tree(os.path.join(source_path, 'CIR'), results_path, update=1)
elif use_pan:
    copy_tree(os.path.join(source_path, 'PAN'), results_path, update=1)
elif use_mhs[0]:
    copy_tree(os.path.join(source_path, 'MHS'), results_path, update=1)
else:
    print('no input files')

In [None]:
def classify_and_export(model_b, in_features_b, results_path_b):
    for i, patch in enumerate(os.listdir(results_path_b)):
        in_patch = in_features_b[i,:,:,:]
        pred = model_b(in_patch[None,:,:,:].cuda()).cpu().detach().numpy()
        pred = pred[0,:,:,:].argmax(0).squeeze()

        imageio.imwrite(results_path_b + patch, pred.astype(np.uint8))

In [None]:
classify_and_export(model, in_features, results_path)