In [None]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils


import os
import numpy as np
from glob import glob
import sklearn
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

In [None]:
def get_device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = get_device()

In [None]:
class PASTISSegmentation(Dataset):
    """
    Here we use a subset of the PASTIS dataset: https://github.com/VSainteuf/pastis-benchmark
    """
    def __init__(
        self,
        image_dir: str,
        annotation_dir: str,
        split:str = "train",
        median_of_days: bool = False,
        Xmean = None,
        Xstd = None,
        binary_labels: bool = False,
        normalize:bool = True,
        transform = None,
        device = 'cpu'
    ) -> None:
        self.split = split
        self.transform = transform
        images = glob(os.path.join(image_dir, split, 'S2_*.npy'))
        self.images = images
        
        annotations = []
        for im in images:
            name = os.path.splitext(os.path.basename(im))[0].replace("S2_", "")
            annotations.append(os.path.join(annotation_dir, split, f"TARGET_{name}.npy"))
        self.annotations = annotations
        # Store in the class for future reference
        self.median_of_days = median_of_days
        self.binary_labels = binary_labels
        
        self.Xmean  = Xmean
        self.Xstd = Xstd
        
    def __len__(self):
        return len(self.images)
    
    def read_data(self, files, median, norm=True):
        """
        Reads and stacks our data
        """
        t = []
        for im in files:
            r = np.load(im)
            if median:
                r = np.median(r, axis=0) #Take median value across 43 days
            if norm and self.Xmean is not None and  self.Xstd is not None:
                r = (r - self.Xmean) / self.Xstd
            t.append(r)
        return np.stack(t, axis=0)


    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        X = self.read_data([self.images[idx]], self.median_of_days)[0]
        y =  (self.read_data([self.annotations[idx]], median=False, norm=False)[0,0]).astype(np.int32)

        if self.binary_labels:
            y[y>0] = 1 # Convert to binary labels
            
            sample = {
             'X': torch.FloatTensor(X), 
             'y': torch.FloatTensor(y)
            }
        else:
            sample = {'X': torch.FloatTensor(X), 'y': torch.LongTensor(y)}
        if self.transform:
            sample = self.transform(sample)

        return sample

In [None]:
base_path = "/home/ankit/Vision/Pastis/" # Define it

Xmean = np.array([ 596.57817383, 878.493514, 969.89764811, 1324.39628906, 2368.21767578, 2715.68257243, 2886.70323486, 2977.03915609, 2158.25386556, 1462.10965169])
Xmean = Xmean.reshape((10, 1, 1))
Xstd = np.array([251.33337853, 289.95055489, 438.725014, 398.7289996, 706.53781626, 832.72503267, 898.14189979, 909.04165075, 661.66078257, 529.15340992])
Xstd = Xstd.reshape((10, 1, 1))

p_train = PASTISSegmentation(os.path.join(base_path, "data", "images"),
                             os.path.join(base_path, "data", "annotations"),
                             split="train",
                             median_of_days=True,
                             Xmean=Xmean,
                             Xstd=Xstd,
                             binary_labels=False, 
                             transform = None)

p_val = PASTISSegmentation(os.path.join(base_path, "data", "images"),
                             os.path.join(base_path, "data", "annotations"),
                            split="val",
                            median_of_days=True,
                            Xmean=Xmean,
                            Xstd=Xstd,
                            binary_labels=False, 
                            transform = None)

p_test = PASTISSegmentation(os.path.join(base_path, "data", "images"),
                             os.path.join(base_path, "data", "annotations"),
                            split="test",
                            median_of_days=True,
                            Xmean=Xmean,
                            Xstd=Xstd,
                            binary_labels=False, 
                            transform = None)

In [None]:
# Create a dataloader from the dataset
# Dataloader gives us the possibility to sample a mini-batches instead of only a single sample
BATCH_SIZE = 4 # Adjust the batch to fit the VRAM of the GPU
# If your BATCH_SIZE is too small then compensate for it by increasing the grad_accumulation in the training step

num_workers = 8 # Change depending upon the available hardware
train_dataloader = DataLoader(p_train, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=num_workers)

val_dataloader = DataLoader(p_val, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=num_workers)

test_dataloader = DataLoader(p_test, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=num_workers)


In [None]:
for d in train_dataloader:
    print(d['X'].shape, d['y'].shape, )
    print(d['X'].dtype, d['y'].dtype, )
    break

In [None]:
imd = 5
# Show the 3rd band of the third image
plt.imshow(p_train[imd]['X'][3].cpu().numpy())
plt.show()

# Show the labels for third image
plt.imshow(p_train[imd]['y'].cpu().numpy())
plt.colorbar()
plt.show()

# Neural Network

In [None]:
# Define our neural network

# Modified from: https://github.com/milesial/Pytorch-UNet
# GPL-3 license

class DoubleConv(nn.Module):
    """(convolution => [BN] => ELU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ELU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ELU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)
    
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = (DoubleConv(n_channels, 64))
        self.down1 = (Down(64, 128))
        self.down2 = (Down(128, 256))
        self.down3 = (Down(256, 512))
        factor = 2 if bilinear else 1
        self.down4 = (Down(512, 1024 // factor))
        self.up1 = (Up(1024, 512 // factor, bilinear))
        self.up2 = (Up(512, 256 // factor, bilinear))
        self.up3 = (Up(256, 128 // factor, bilinear))
        self.up4 = (Up(128, 64, bilinear))
        self.outc = (OutConv(64, n_classes))

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

    def use_checkpointing(self):
        self.inc = torch.utils.checkpoint(self.inc)
        self.down1 = torch.utils.checkpoint(self.down1)
        self.down2 = torch.utils.checkpoint(self.down2)
        self.down3 = torch.utils.checkpoint(self.down3)
        self.down4 = torch.utils.checkpoint(self.down4)
        self.up1 = torch.utils.checkpoint(self.up1)
        self.up2 = torch.utils.checkpoint(self.up2)
        self.up3 = torch.utils.checkpoint(self.up3)
        self.up4 = torch.utils.checkpoint(self.up4)
        self.outc = torch.utils.checkpoint(self.outc)

In [None]:
# Define the evaluation loop
def eval_loop(model, val_loader, criterion):
#     print(f"Validating using the val_loader")
    epoch_loss_val = []
    model.eval()
    with torch.no_grad():
        for batch_idx, batch in enumerate(val_loader):
            x, y_true = batch['X'].to(dtype=torch.float32, device=device), batch['y'].to(device=device)
            y_true = torch.squeeze(y_true, dim=1)
            y_pred = model(x)
            
            ### Calcualte loss
            loss = criterion(y_pred, y_true)
            epoch_loss_val.append(loss.item())
    el = torch.mean(torch.FloatTensor(epoch_loss_val))
    model.train()
    return el

# Define the training loop
def train_loop(model, train_loader, val_loader, optimizer, criterion, epochs=50, grad_accumulation=1):
    train_loss = []
    val_loss = []
    n_train =  len(train_loader)
    for e in range(epochs):
        epoch_loss_train = []
        model.train()
        for batch_idx, batch in enumerate(train_loader):
            print(f"Training on Batch: {batch_idx}")
            x, y_true = batch['X'].to(dtype=torch.float32, device=device), batch['y'].to(device=device)
            y_true = torch.squeeze(y_true, dim=1)
            y_pred = model(x)

            ### Calcualte loss
            loss = criterion(y_pred, y_true)
            loss.backward()

            if (batch_idx + 1) % grad_accumulation == 0 or (batch_idx + 1 == n_train):
                    optimizer.step()
                    optimizer.zero_grad()

            epoch_loss_train.append(loss.item())

        el = torch.mean(torch.FloatTensor(epoch_loss_train))
        print(f"Train loss for epoch {e}: {el}")
        train_loss.append(el)
        
        vel = eval_loop(model, val_loader, criterion)
        print(f"Validation loss for epoch {e}: {vel}")
        val_loss.append(vel)
        
    return model, train_loss, val_loss

In [None]:
# Define our first model
input_size = 10
output_size = 20

model = UNet(n_channels = input_size, n_classes = output_size)
model.to(device=device)
# Define the optimizer, the loss function and 
lr = 0.0001 # The learning rate
optimizer = torch.optim.AdamW(model.parameters(), lr=lr) # Optimizer calculates the gradients and use it to update the model weights. 
criterion = nn.CrossEntropyLoss(reduction='mean')

In [None]:
# Train the model
start_time = time.time()
epochs = 20

grad_accumulation = 4 # Accumulate grad over 4 batches

#Train the model for 50 epochs with CrossEntropry loss.
model, train_loss, val_loss = train_loop(model, train_dataloader, val_dataloader,  optimizer, criterion, epochs=epochs, grad_accumulation=grad_accumulation)
print(f"Trained for {epochs} epochs in {time.time()-start_time} seconds")

In [None]:
#Validate the model performance on the validation dataset and plot the training curve
#Visualize a convolutional kernel from the first convolutional layer.


Task 2

In [None]:

# Loss functions (30 points)

# Dataset: Same as task 1

# Treat it as a regression task, where the model predicts a single value. 
# Train the model for 20 epochs with L2 loss. 
# Note: The model will require modifications since you want a single value output instead of probability 
# of each class. From the regressed value, use round(y_pred) to treat it as the class label. 
# For example, if the output is 14.2, treat it like the model predicted class 14 using round(14.2) method.

# Validate the model and plot the training curves. Comment on your observations. 
# Visualize a convolutional kernel from the first convolutional layer.

# Choose the better model of the two and evaluate on the test set


Task 3

In [None]:
#  Learning rate (40 points)

# Summary: The importance of a correct learning rate

# Dataset: Same as task 1
# Train the model  for 50 epochs with a learning rate of 10.
# Train the model  for 50 epochs with a learning rate of 1e-3.
# Train the model  for 50 epochs with a learning rate of 1e-10.
# Validate each model and plot each of the training curves. Comment on your observations. 
