In [40]:
#import json
import pandas as pd
import geopandas as gpd
#import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
from sys import path
#import time
#from tqdm import tqdm
import glob
import rasterio

%matplotlib inline
os.getcwd()

ModuleNotFoundError: No module named 'geopandas'

# Automated Sentinel-1 Ice, Water, Land Segmentation Challenge



This notebook is the adjusted template, to help guide through the training process. Feel free to use as little or as much of it as you like.

For the purposes of the template, we will assume a *classification* approach, which involves sub-sampling small images from the Sentinel-1 images. There will be notes where code should be adjusted for a *segmentation* approach.

### Dataset preparation - (1) sub-sampling

Sample patches from each TIF image, and find the corresponding label using the Shapefiles. Save each image with a unique ID save in the directory **SAMPLING_DIR**. Save the corresponding meta data in the following format (this could be a CSV file, NumPy array, or some other format), in the directory **META_DIR**:


```
image_id, x, y, label
```

Set the label value as one of "L", "W", "I" as specified in the Shapefiles.

To make it easier to patch the final segmentation back together, it is suggested to use the (x, y) pixel coordinates of the patch, rather than the spatial coordinates.

In [27]:
curdir = os.getcwd()
SAMPLING_DIR = curdir + '\sampling_dir' 
META_DIR = curdir + '\meta_dir'
print(SAMPLING_DIR, META_DIR)

C:\Users\s2126908\OneDrive\Oceanography Coursework\PhD\Leeds_training\w2\2021.02.25.hackathon_ati\sampling_dir C:\Users\s2126908\OneDrive\Oceanography Coursework\PhD\Leeds_training\w2\2021.02.25.hackathon_ati\meta_dir


Some helpful code: reading in a single Sentinel-1 image and the corresponding Shapefile.

In [38]:
# the directory containing all shapefiles - i.e., the location of sea_ice/ 
SHAPEFILE_DIR = curdir + '\EE_Polar_Training_Dataset_v-1-0-0'  

shapefile = SHAPEFILE_DIR + '\Sea_Ice\seaice_s1_20180116t075430.shp' # full name of .shp file

print(shapefile)
# extract the shape ID, for example, 20180116T075430
shp_id = shapefile.split("_")[-1][:-4].upper()
print(shp_id)

# locate the corresponding Sentinel-1 image based on the ID
# this should only return 1 match, which you can confirm
tiff_file = curdir + '\Sentinel geotiffs\S1?_*_' + shp_id + '*.tif'
#print(tiff_file)
#S1B_EW_GRDM_1SDH_20181113T074529_20181113T074629_013583_019254_D382_Orb_Cal_Spk_TC_rgb_8bit

#for name in glob.glob(tiff_file):
#    print(name)

tiff_file = glob.glob(tiff_file)
print(name)

#tiff_file = #[g for g in tiff_files if shp_id in g]
#tiff_file = tiff_file[0]

C:\Users\s2126908\OneDrive\Oceanography Coursework\PhD\Leeds_training\w2\2021.02.25.hackathon_ati\EE_Polar_Training_Dataset_v-1-0-0\Sea_Ice\seaice_s1_20180116t075430.shp
20180116T075430
C:\Users\s2126908\OneDrive\Oceanography Coursework\PhD\Leeds_training\w2\2021.02.25.hackathon_ati\Sentinel geotiffs\S1A_EW_GRDM_1SDH_20180116T075430_20180116T075530_020177_0226B9_9FE3_Orb_Cal_Spk_TC_rgb_8bit.tif


Feel free to use other Python packages; but as an example, here we use **geopandas** to read in the Shapefile, and **rasterio** to read the GeoTIFF.

In [39]:
shape_data = gpd.read_file(SHAPEFILE_DIR + shapefile)

shape_data.head()

NameError: name 'gpd' is not defined

In [None]:
# directory containing all GeoTIFF files
TIFF_DIR = curdir + '\Sentinel geotiffs'  

tif_img = rasterio.open(TIFF_DIR + tiff_file)

The shapes in the Shapefiles are **shapely** objects. We can also use the Python package **shapely** to check whether an x, y pixel coordinate position is in a given polyshape.

In [None]:
from shapely.geometry import Point

x = 4000
y = 8000

point = Point(x, y)

# for example, specify the shape in the Shapefile
shape_id = 2

if shape_data['geometry'][shape_id].contains(point):
    print("Point", point, "is in shape", shape_id, "and has class", shape_data['poly_type'][shape_id])

Define a train/validation ratio. Patches and meta saved from the test TIF images should be stored in separate directories.

In [3]:
TRAIN_SIZE = 0.7

# valid size = 1.0 - TRAIN_SIZE

Map the class category characters to integers.

In [4]:
LABELS = {
	"L": 0,
	"W": 1,
	"I": 2,
}

The following is a Dataset class which reads in image data saved in the format described above.

In [7]:
from torch.utils.data import Dataset
from torchvision import transforms

from PIL import Image


class PolarPatch(Dataset):
    def __init__(self, transform=None, split="train"):
        super(PolarPatch, self).__init__()

        assert split in ["train", "val"]
        
        # TODO: load in meta data, which should be of shape (3, N) - N being the number of samples
        meta = []

        train_dim = int(TRAIN_SIZE * len(meta))
        
        if split == "train":
            meta = meta[:train_dim]
        else:
            meta = meta[train_dim:]                   

        self.images = range(len(meta))
        self.coords = [(row[1], row[2]) for row in meta]

        # Targets in integer form for computing cross entropy
        self.targets = [LABELS[row[3]] for row in meta]
        self.transform = transform


    def __len__(self):
        return len(self.targets)

    def __getitem__(self, index):

        x = Image.open(SAMPLING_DIR + str(self.images[index]) + ".png") # change this file format if needed
        y = self.targets[index]
        coord = self.coords[index]

        if self.transform:
        	x = self.transform(x)

        return x, y, coord

An example data transform

In [8]:
data_transform = transforms.Compose([
    # TODO: add whatever else you need - normalisation, augmentation, etc.
	transforms.ToTensor(),
])

### Dataset preparation - (2) data loaders

Now we can prepare the data loaders. Here is the example for the training set; you will also need the validation and test set.

In [12]:
import torch

# TODO set this value based on your working environment
BATCH_SIZE = 128

train_set = PolarPatch(
    split='train',
    transform=data_transform
)

train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

### Model

You can use a custom model architecture, or copy one from literature. It is recommended to not build too deep of a network for the sake of training time.

In [13]:
import torch.nn as nn


class PolarNet(nn.Module):
    def __init__(self, n_classes=3):
        super(PolarNet, self).__init__()

        self.features = nn.Sequential(
            # TODO: build your own architecture here; one conv layer and ReLU here as an example only
            nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True), 
        )

        self.classifier = nn.Sequential(
            # TODO: continue classifier section of architecture here for classification approach;
            # otherwise, remove and add in upscaling for a fully-convolutional segmentation approach 
            nn.Linear(4096, n_classes),
        )      

    def forward(self, x):
        # as an example; alter as needed depending on your architecture
        x = self.features(x)

        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

### Training

An example of loading the model, setting a loss criteria and defining an optimizer.

In [18]:
# Device configuration - defaults to CPU unless GPU is available on device
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [20]:
from torch import optim

model = PolarNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()

# Stochastic gradient descent - TODO: alter as needed
optimizer = optim.SGD(
	model.parameters(),
	lr=0.001,
	weight_decay=0.0005,
	momentum=0.9,
)

Train the model, batch by batch, for as many iterations as required to converge. You can use the validation set to determine automatically when to stop training.

### Evaluation

Evaluate patch-based accuracy on the test set; then using the test patch coordinates, piece together the segmentation prediction on the original TIF images.