## Starter Notebook For The Côte d’Ivoire Byte-Sized Agriculture Challenge

This starter notebook achieves the following:


1.   Load & display satellite images
2.   Build a basic machine learning model
3.   Prepare the predictions for submission.

The machine learning model shown here is by no mean ideal. Winning models should leverage multi-temporal data and not single-date imagery.

# Processing  the GTIFF files

Here's a complete list of **Sentinel-2 bands** along with their **descriptions**, **wavelengths**, and **spatial resolutions**:

---

### 🛰️ **Sentinel-2 Band Overview**

| Band | Name                    | Wavelength (nm) | Resolution (m) | Description |
|------|-------------------------|-----------------|----------------|-------------|
| B1   | Coastal aerosol         | 443             | 60             | Useful for atmospheric correction and aerosol detection. |
| B2   | Blue                    | 490             | 10             | Penetrates water; used for vegetation, water bodies, and coastal mapping. |
| B3   | Green                   | 560             | 10             | Good for assessing plant health and measuring biomass. |
| B4   | Red                     | 665             | 10             | Key band for vegetation discrimination and chlorophyll absorption. |
| B5   | Red Edge 1              | 705             | 20             | Useful for detecting subtle changes in vegetation. |
| B6   | Red Edge 2              | 740             | 20             | Further detail on vegetation, especially stress and growth stages. |
| B7   | Red Edge 3              | 783             | 20             | Extended vegetation monitoring. |
| B8   | NIR (Near Infrared)     | 842             | 10             | Excellent for vegetation vigor, biomass, and land-water contrast. |
| B8A  | Narrow NIR (Red Edge 4) | 865             | 20             | Sensitive to chlorophyll content and canopy structure. |
| B9   | Water Vapor             | 945             | 60             | Atmospheric correction (water vapor absorption). |
| B10  | SWIR – Cirrus           | 1375            | 60             | Detects high-altitude clouds (cirrus); not used for land analysis. |
| B11  | SWIR 1                  | 1610            | 20             | Differentiates clouds, snow, and vegetation moisture. |
| B12  | SWIR 2                  | 2190            | 20             | Useful for geology, soil, and burnt area mapping. |

---

### ✅ Band Groupings by Application

#### **RGB (Natural Color)**
- **B4 (Red)**
- **B3 (Green)**
- **B2 (Blue)**

#### **Vegetation Indices**
- **NDVI:** B8 (NIR) & B4 (Red)
- **EVI:** B8 (NIR), B4 (Red), B2 (Blue)
- **SAVI:** B8 (NIR) & B4 (Red)
- **NDRE:** B8A (Narrow NIR) & B5 (Red Edge)

#### **Cloud/Atmosphere Analysis**
- B1, B9, B10

#### **Snow/Ice Monitoring**
- B3, B11, B12

---


In [1]:
%%capture
!pip install rasterio -q

In [2]:
import rasterio
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import cv2
import numpy as np
import os
from torchvision import transforms
from pathlib import Path
import torch.nn.functional as F
import os

In [1]:
class BasicCnn(nn.Module):

    def __init__(self, inputImgChans, numClass, convKernelSize=3, inputImgSize=236):
        super().__init__()

        self.cnnStack = nn.Sequential(
            nn.Conv2d(in_channels=inputImgChans,out_channels=16,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(in_channels=16,out_channels=16,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),

            nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),

            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2),

            nn.Flatten(),
            nn.Linear(in_features=2304,out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512,out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128,out_features=numClass)
        )


    def forward(self,inputs):
        outputs = self.cnnStack(inputs)
        return outputs

NameError: name 'nn' is not defined

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data_frame.iloc[idx]['tifPath'])
        label = self.data_frame.iloc[idx]['class']

        try:
            with rasterio.open(img_path) as src:
                raster = src.read()  # (bands, height, width)
            raster = torch.tensor(raster, dtype=torch.float32)
            raster = raster[:,:48,:48]

            label = torch.tensor(label, dtype=torch.long) - 1
            return raster, label

        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            return None, None

In [None]:
class SkipConCnn(nn.Module):

    def __init__(self, inputImgChans, numClass, convKernelSize=3, inputImgSize=236):
        super().__init__()

        self.layer1 = nn.ModuleList([
            nn.Conv2d(in_channels=inputImgChans,out_channels=16,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(in_channels=16,out_channels=16,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip1 = nn.ModuleList([
            nn.Conv2d(in_channels=inputImgChans,out_channels=16,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.layer2 = nn.ModuleList([
            nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip2 = nn.ModuleList([
            nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.layer3 = nn.ModuleList([
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip3 = nn.ModuleList([
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.linLayer = nn.ModuleList([
            nn.Flatten(),
            nn.Linear(in_features=2304,out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512,out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128,out_features=numClass)
        ])




    def forward(self,inputs):
        x = inputs
        for layer in self.layer1:
            x = layer(x)
        y = inputs
        for layer in self.skip1:
            y = layer(y)
        inputs = x + y


        x = inputs
        for layer in self.layer2:
            x = layer(x)
        y = inputs
        for layer in self.skip2:
            y = layer(y)
        inputs = x+y


        x = inputs
        for layer in self.layer3:
            x = layer(x)
        y = inputs
        for layer in self.skip3:
            y = layer(y)
        outputs = x+y

        for layer in self.linLayer:
            outputs = layer(outputs)

        return outputs

In [None]:
class InceptionModule(nn.Module):
    def __init__(self, in_channels, out_channels_1x1, out_channels_3x3_reduce, out_channels_3x3,
                 out_channels_5x5_reduce, out_channels_5x5, out_channels_pool):
        super(InceptionModule, self).__init__()

        self.layer1x1 = nn.ModuleList([
            nn.Conv2d(in_channels=in_channels,out_channels=out_channels_1x1,kernel_size=1,stride=1,padding=0, bias=False),
            nn.BatchNorm2d(out_channels_1x1),
            nn.ReLU()
        ])
        self.layer3x3 = nn.ModuleList([
            nn.Conv2d(in_channels=in_channels,out_channels=out_channels_3x3_reduce,kernel_size=1,stride=1,padding=0,bias=False),
            nn.BatchNorm2d(out_channels_3x3_reduce),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels_3x3_reduce,out_channels=out_channels_3x3,kernel_size=3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(out_channels_3x3),
            nn.ReLU()
        ])
        self.layer5x5 = nn.ModuleList([
            nn.Conv2d(in_channels=in_channels,out_channels=out_channels_5x5_reduce,kernel_size=1,stride=1,padding=0,bias=False),
            nn.BatchNorm2d(out_channels_5x5_reduce),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels_5x5_reduce,out_channels=out_channels_5x5,kernel_size=5,stride=1,padding=2,bias=False),
            nn.BatchNorm2d(out_channels_5x5),
            nn.ReLU()
        ])
        self.layerPool = nn.ModuleList([
            nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
            nn.Conv2d(in_channels=in_channels,out_channels=out_channels_pool,kernel_size=5,stride=1,padding=2,bias=False),
            nn.BatchNorm2d(out_channels_pool),
            nn.ReLU()
        ])

    def forward(self,x):
        x1 = x
        for layer in self.layer1x1:
            x1 = layer(x1)
        x3 = x
        for layer in self.layer3x3:
            x3 = layer(x3)
        x5 = x
        for layer in self.layer5x5:
            x5 = layer(x5)
        xp = x
        for layer in self.layerPool:
            xp = layer(xp)

        output = torch.cat([x1, x3, x5, xp],dim=1)

        return output

class CnnSkipConInception(nn.Module):

    def __init__(self,inputImgChans,numClass,convKernelSize=3,inputImgSize=236):
        super().__init__()

        self.layer1 = nn.ModuleList([
            InceptionModule(inputImgChans, 16,16,16,16,16,16),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip1 = nn.ModuleList([
            nn.Conv2d(in_channels=inputImgChans,out_channels=64,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.layer2 = nn.ModuleList([
            InceptionModule(64, 32,32,32,32,32,32),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip2 = nn.ModuleList([
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.layer3 = nn.ModuleList([
            InceptionModule(128, 64,64,64,64,64,64),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.skip3 = nn.ModuleList([
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        ])

        self.linLayer = nn.ModuleList([
            nn.Flatten(),
            nn.Linear(in_features=9216,out_features=1024),
            nn.ReLU(),
            nn.Linear(in_features=1024,out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128,out_features=numClass)
        ])

    def forward(self,inputs):
        x = inputs
        for layer in self.layer1:
            x = layer(x)
        y = inputs
        for layer in self.skip1:
            y = layer(y)
        inputs = x+y

        x = inputs
        for layer in self.layer2:
            x = layer(x)
        y = inputs
        for layer in self.skip2:
            y = layer(y)
        inputs = x+y

        x = inputs
        for layer in self.layer3:
            x = layer(x)
        y = inputs
        for layer in self.skip3:
            y = layer(y)
        outputs = x+y

        for layer in self.linLayer:
            outputs = layer(outputs)

        return outputs

In [None]:
def create_dataloaders(csv_file, root_dir, batch_size=32, train_split=0.8, transform=None, num_workers=4):
    full_dataset = CustomImageDataset(csv_file=csv_file, root_dir=root_dir, transform=transform)

    train_size = int(train_split * len(full_dataset))
    val_size = len(full_dataset) - train_size

    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=True
    )

    return train_loader, val_loader

In [None]:
def train_and_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            if data is None:
                continue
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for data, target in val_loader:
                if data is None:
                    continue
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()

                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {avg_train_loss:.4f}')
        print(f'Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

In [3]:
if __name__ == '__main__':

    train_loader,val_loader = create_dataloaders(
        csv_file=Path("S2Images/TrainDataset.csv"),
        root_dir=Path(''),
        batch_size=128,
        train_split=0.8,  # 80% train, 20% validation
        transform=None,
        num_workers=4
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CnnSkipConInception(inputImgChans=12, numClass=3, convKernelSize=3, inputImgSize=48).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

    train_and_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10)