In [None]:
#@title Install service functions for the notebook

!pip install "git+https://github.com/broutonlab/deep-learning-course.git"
import dl_course

In [None]:
#@title Install dependencies

!pip install pytorch-lightning
!pip install ipyplot

In [None]:
#@title Download dataset
import ipyplot
import numpy as np
import pandas as pd

from dl_course.cnns.utils import *

DATASET_PATH = "satellite_dataset"

download_dataset(DATASET_PATH, "1LOO2U1xSnGByYHzlt7siCBn1G3Jaq_Gs")

In [None]:
import torch

# [!] If cuda is not available,
# start using 'TPU' or 'GPU' in notebook settings
print(f'Cuda available: {torch.cuda.is_available()}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Meet the RSI-CB256 dataset

For this notebook we'll be using a portion of satellite image classification dataset (https://www.kaggle.com/datasets/sohelranaccselab/satellite-data)

Your task will be to implement a classifier that can guess some types of sceneries using satellite imagery data.

In [None]:
preview_dataset(DATASET_PATH, num_images=5)

# Dataset initialization

Our dataset contains three files, already split for you into:
- train.csv (70% of the whole data)
- val.csv (15%)
- test.csv (15%)

In [None]:
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import torchvision.transforms as T
import pandas as pd
import torch.nn as nn
import pytorch_lightning as pl


# we'll have separate instances of this class for train, val, test
class SatelliteDataset(Dataset):
  def __init__(self, data: pd.DataFrame, labels: list):
    super().__init__()
    self.data = data
    self.resize_transform = T.Resize(size = (256, 256))
    self.labels = labels

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    row = self.data.iloc[idx]
    y = row[1]

    # converting label to onehot encoding
    y_onehot = [1 if label == row[1] else 0
                for label in self.labels]
    y_onehot = torch.tensor(y_onehot).float()

    X = read_image(os.path.join(DATASET_PATH, row[0]))
    # we are resizing all images, although you can try implementing 
    # an architecture that is able to handle images of different sizes
    if X.size() != (256, 256):
      X = self.resize_transform(X)

    # convert uint8 ([0, 255]) to float ([0, 1])
    X = X.float() / 255

    return {
        "X": X,
        "y": y_onehot
    }



# this is just an initializer of train, val and test datasets
class SatelliteDataModule(pl.LightningDataModule):
  def __init__(self, data_folder: str, labels: list,
               batch_size: int, num_workers: int = None):
    super().__init__()
    self.data_folder = data_folder
    self.batch_size = batch_size
    self.num_workers = os.cpu_count() if num_workers is None else num_workers

  def _load_dataframe(self, filename: str):
    csvfile = os.path.join(self.data_folder, filename)
    return pd.read_csv(csvfile)

  def train_dataloader(self):
    return DataLoader(
        dataset=SatelliteDataset(self._load_dataframe("train.csv"), labels),
        batch_size=self.batch_size,
        num_workers=self.num_workers,
        shuffle=True)
    
  def val_dataloader(self):
    return DataLoader(
        dataset=SatelliteDataset(self._load_dataframe("val.csv"), labels),
        batch_size=self.batch_size,
        num_workers=self.num_workers)
    
  def test_dataloader(self):
    return DataLoader(
        dataset=SatelliteDataset(self._load_dataframe("test.csv"), labels),
        batch_size=self.batch_size,
        num_workers=self.num_workers)

# Implement your model

Let's jump right into the code and implement a simple architecture.

In [None]:
import torch.nn as nn
import torchvision.transforms as T

torch.manual_seed(1)

class SatelliteCNN(pl.LightningModule):
	def __init__(self, num_classes=None, learning_rate=None):
		super().__init__()
		self.learning_rate = learning_rate
		self.num_classes = num_classes
		self.cnn = nn.Sequential(
				# ===============
				# [!] TODO: 
				# 	The first layer of our network 
				# 	will be a convolutional layer.
				#		It takes original image (3 channels, 256x256) as input
				#		and should output two 32x32 feature maps
				#		(you'll see if it's an appropriate choice later).
				#		
				# 	You need to figure the parameters of the layer
				# 	so that output shapes match. (we chose kernel_size=16 for you)
				# 	You may this calculator:
				# 	https://madebyollin.github.io/convnet-calculator/
				#   Or the docs:
				#   https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
				nn.Conv2d(in_channels=...,
									out_channels=...,
									kernel_size=16,
									stride=...,
									padding=4),
				# ===============
				nn.ReLU(),
				nn.BatchNorm2d(2),
				nn.Flatten(start_dim=1),
				nn.Linear(2*32*32, 8*8),
				nn.ReLU(),
				# ===============
				# [!] TODO: 
				# 	Implement final affine layer
				...
				# ===============
				nn.Softmax(-1)
      )

	def forward(self, x):
		# ===============
		# [!] TODO: implement forward pass for our model
		return ...
		# ===============

	def configure_optimizers(self):
		# ===============
		# [!] TODO: configure optimizer.
		# Let's use Adam. Don't forget to use self.learning_rate
		# https://pytorch.org/docs/stable/optim.html
		return ...
		# ===============

	def loss_fn(self, y_hat, target):
		return nn.CrossEntropyLoss()(y_hat, target)
	
	def _eval_step(self, x, y):
		img = x.view(-1, 3, 256, 256)
		y_hat = self.forward(img)
		loss = self.loss_fn(y_hat, y)
		return y_hat, loss

	def training_step(self, batch, batch_idx):
		x, y = batch["X"], batch["y"]
		y_hat, loss = self._eval_step(x, y)
		self.log('train_loss', loss)
		return loss

	def validation_step(self, batch, batch_idx):
		x, y = batch["X"], batch["y"]
		y_hat, loss = self._eval_step(x, y)
		self.log('val_loss', loss)
	
	def test_step(self, batch, batch_idx):
		x, y = batch["X"], batch["y"]
		y_hat, loss = self._eval_step(x, y)
	
		_, preds = torch.max(y_hat, 1)
		_, y_labels = torch.max(y, 1)
		correct = torch.sum(preds == y_labels)
		
		return {'test_loss': loss, 'correct': correct, 'num_entries': x.shape[0]}

	def test_epoch_end(self, outputs):
		avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()

		avg_acc = torch.stack([x['correct'].float() for x in outputs]).sum() \
						/ sum(output['num_entries'] for output in outputs)

		logs = {'test_loss': avg_loss, 'test_acc': avg_acc}
		self.log_dict(logs)


# if you get "out of memory" errors,
# reduce the batch size
batch_size = 32

labels = ["bridge", "coastline", "dam", "highway", "parkinglot", "river"]

# data
datamodule = SatelliteDataModule("satellite_dataset/", labels, batch_size=batch_size)

# model
model = SatelliteCNN(num_classes=len(labels), learning_rate=1e-3)

# Sanity check

It is reasonable to start our training with small number of batches to test if every line of our code works

In [None]:
# Check if everything runs OK:
trainer = pl.Trainer(accelerator='gpu', devices=-1, fast_dev_run=3)
trainer.fit(model=model, datamodule=datamodule)

# Train the model

This may take a while...

In [None]:
logger = pl.loggers.TensorBoardLogger("tb_logs", name="satellite_cnn")
trainer = pl.Trainer(accelerator='gpu', devices=-1, max_epochs=15, log_every_n_steps=20, logger=logger)
trainer.fit(model=model, datamodule=datamodule)

# Test model performance

In [None]:
trainer.test(model=model, datamodule=datamodule)

#Visualize the model

In [None]:
visualize_model(model, datamodule, labels, num_images=6)

# Tensorboard

This is a dashboard that allows you to monitor different metrics during training and across multiple runs.
(it won't load instantly though)

'train_loss' and 'val_loss' time series are our primary subjects of interest.

In [None]:
%load_ext tensorboard
%tensorboard --logdir tb_logs/

# Evalute the results

If you've done everything right, you should see about 58% (or at least >45%) accuracy on test dataset.

This performance is not satisfactory.

# Home task: improve your model

We've been able to easily achieve >90% accuracy on test dataset, and you should too!

We'll provide some tips on how to do it below.

## Increase number of epochs

Number of epochs needed to train your model may vary with model parameters and augmentations.

Keep an eye on validation loss. If it stops decreasing long before the final epoch or starts to increase, you probably don't need that many epochs.

## Use different convolutions

Our convolution outputs just 2 feature maps.  
It is common for CNNs to have a lot more filters (64, 96, 256, etc.)  
CNNs are often comprised of 3 or more convolutional layers, you should add more layers too.  
Feel free to expore different CNN architectures on the web.

## Use max pooling

Max pooling is often used to reduce dimensionality of data.  
It is reasonable to use max pooling after convolutions and before activation functions, because

$ MaxPool(ReLU(X)) = ReLU(MaxPool(X)) $  
(think which activation functions also satisfy this equation)

Max pooling, similar to convolution, won't let you define its output dimensions explicitly. You may want to use formulas provided in the docs:  
https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html


## Use data augmentation

Is a mirrored image of a river also an image of a river? Of course it is.  
Using this logic, we can generate new datapoints using existing ones by cropping/scaling/flipping our images. 

One of the simpler ways to it in our code would look like this:
```python
import torchvision.transforms as T
# ...
class SatelliteDataset(Dataset):
  def __init__(self, data: pd.DataFrame, labels: list, transform=None):
    super().__init__()
    self.transform = transform
    # ...

  #...
  def __getitem__(self, idx):
    #...
    if self.transform:
      X = self.transform(X)
    #...

#...
class SatelliteDataModule(pl.LightningDataModule):
  #...
  def train_dataloader(self):
    return DataLoader(
        dataset=SatelliteDataset(
            self._load_dataframe("train.csv"), labels, 
            # you may want to wrap it in our own callable class
            transform=T.Compose([
              T.RandomVerticalFlip(p=0.5),
              T.RandomHorizontalFlip(p=0.5)
            ])),
        batch_size=self.batch_size,
        num_workers=self.num_workers,
        shuffle=True)

```

For different types of transforms, refer here:  
https://pytorch.org/vision/stable/transforms.html