In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [2]:
metadata = pd.read_csv('/kaggle/input/deepglobe-land-cover-classification-dataset/metadata.csv')
class_dict = pd.read_csv('/kaggle/input/deepglobe-land-cover-classification-dataset/class_dict.csv')

# Create mapping from RGB to class index
color2class = {tuple(class_dict.iloc[i][['r','g','b']]): i for i in range(len(class_dict))}


In [3]:
class LandCoverDataset(Dataset):
    def __init__(self, df, img_dir, mask_dir, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def rgb_to_mask(self, mask):
        mask = np.array(mask)
        mask_idx = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
        for rgb, idx in color2class.items():
            matches = np.all(mask == rgb, axis=-1)
            mask_idx[matches] = idx
        return mask_idx

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join('/kaggle/input/deepglobe-land-cover-classification-dataset', row['sat_image_path'])
        mask_path = os.path.join('/kaggle/input/deepglobe-land-cover-classification-dataset', row['mask_path'])
    
        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('RGB')
    
        # Apply image transform
        if self.transform:
            image = self.transform(image)
    
        # Resize mask to match image size
        mask = mask.resize((256, 256), resample=Image.NEAREST)
    
        # Convert mask to class indices
        mask = self.rgb_to_mask(mask)
        mask = torch.tensor(mask, dtype=torch.long)
    
        return image, mask


In [4]:
transform = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.ToTensor(),
])

# Split metadata
train_df = metadata[metadata['split']=='train']
valid_df = metadata[metadata['split']=='valid']

train_dataset = LandCoverDataset(train_df, img_dir='/kaggle/input/deepglobe-land-cover-classification-dataset/train', 
                                 mask_dir='/kaggle/input/deepglobe-land-cover-classification-dataset/train', 
                                 transform=transform)
valid_dataset = LandCoverDataset(valid_df, img_dir='/kaggle/input/deepglobe-land-cover-classification-dataset/train', 
                                 mask_dir='/kaggle/input/deepglobe-land-cover-classification-dataset/train', 
                                 transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=8)


In [5]:
class DoubleConv(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.dconv_down1 = DoubleConv(3,64)
        self.dconv_down2 = DoubleConv(64,128)
        self.dconv_down3 = DoubleConv(128,256)
        self.dconv_down4 = DoubleConv(256,512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.dconv_up3 = DoubleConv(256+512,256)
        self.dconv_up2 = DoubleConv(128+256,128)
        self.dconv_up1 = DoubleConv(128+64,64)
        self.conv_last = nn.Conv2d(64, n_classes, 1)

    def forward(self, x):
        # Encoder
        conv1 = self.dconv_down1(x)
        conv2 = self.dconv_down2(self.maxpool(conv1))
        conv3 = self.dconv_down3(self.maxpool(conv2))
        conv4 = self.dconv_down4(self.maxpool(conv3))

        # Decoder
        x = self.upsample(conv4)
        x = torch.cat([x, conv3], dim=1)
        x = self.dconv_up3(x)
        x = self.upsample(x)
        x = torch.cat([x, conv2], dim=1)
        x = self.dconv_up2(x)
        x = self.upsample(x)
        x = torch.cat([x, conv1], dim=1)
        x = self.dconv_up1(x)

        out = self.conv_last(x)
        return out


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = UNet(n_classes=len(class_dict)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()


In [7]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for imgs, masks in train_loader:
        imgs, masks = imgs.to(device), masks.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}")


KeyboardInterrupt: 

In [None]:
train_df['sat_image_path'].head()

