In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import random
import os
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [34]:
# Set random seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [None]:
import os, json
os.makedirs(os.path.join(os.path.expanduser("~"), ".kaggle"), exist_ok=True)
with open(os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json"), "w") as f:
    json.dump({"username":"username","key":"apikey"}, f)

In [38]:
!mkdir ~/.kaggle 

The syntax of the command is incorrect.


In [40]:
!--chmod 600 ~/.kaggle/kaggle.json

'--chmod' is not recognized as an internal or external command,
operable program or batch file.


In [29]:
!cd ~/.kaggle/ && ls 

The system cannot find the path specified.


In [30]:
import os

os.environ["KAGGLE_USERNAME"] = "pacymugisho"
os.environ["KAGGLE_KEY"] = "a526600c8b9968cc980d6db82fe706f1"


In [31]:
!kaggle datasets list -s "cityscapes dataset" 

ref                                                  title                                           size  lastUpdated                 downloadCount  voteCount  usabilityRating  
---------------------------------------------------  ---------------------------------------  -----------  --------------------------  -------------  ---------  ---------------  
shuvoalok/cityscapes                                 cityscapes dataset                         209001313  2023-09-13 21:03:34.607000           5999         25  0.875            
balraj98/cityscapes-pix2pix-dataset                  Cityscapes Pix2Pix Dataset                 105595335  2020-10-18 08:10:17.600000            767         17  0.9411765        
vikramtiwari/pix2pix-dataset                         pix2pix dataset                           2574957257  2018-07-04 05:54:59.713000           8826        114  0.625            
dansbecker/cityscapes-image-pairs                    Cityscapes Image Pairs                     211492512

In [32]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shuvoalok/cityscapes")

print("Path to dataset files:", path) 

Path to dataset files: C:\Users\DELL\.cache\kagglehub\datasets\shuvoalok\cityscapes\versions\2


In [22]:
# Define paths
train_images_folder_path = "/kaggle/input/cityscapes/train/img"
train_mask_folder_path = "/kaggle/input/cityscapes/train/label"
test_images_folder_path = "/kaggle/input/cityscapes/val/img"
test_mask_folder_path = "/kaggle/input/cityscapes/val/label" 


In [None]:
# ========== CONSTANTS ==========
names = ['unlabeled', 'dynamic', 'ground', 'road', 'sidewalk', 'parking', 'rail track', 'building', 'wall',
         'fence', 'guard rail', 'bridge', 'tunnel', 'pole', 'traffic light', 'traffic sign', 'vegetation', 
         'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'caravan', 'trailer', 'train', 
         'motorcycle', 'bicycle', 'license plate']

colors = np.array([
    (0, 0, 0), (111, 74, 0), (81, 0, 81), (128, 64, 128), (244, 35, 232), 
    (250, 170, 160), (230, 150, 140), (70, 70, 70), (102, 102, 156), 
    (190, 153, 153), (180, 165, 180), (150, 100, 100), (150, 120, 90), 
    (153, 153, 153), (250, 170, 30), (220, 220, 0), (107, 142, 35), 
    (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0), 
    (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 0, 90), (0, 0, 110), 
    (0, 80, 100), (0, 0, 230), (119, 11, 32), (0, 0, 142)
], dtype=np.uint8)

IMG_HEIGHT, IMG_WIDTH = 96, 256
NUM_CLASSES = 30
BATCH_SIZE = 32

In [None]:
# ========== GDN LAYER ==========
class NonNegConstraint:
    def __call__(self, tensor):
        return torch.clamp(tensor, min=1e-15)

class GDN(nn.Module):
    def __init__(self, in_channels, filter_size=3):
        super(GDN, self).__init__()
        self.filter_size = filter_size
        self.padding = (filter_size - 1) // 2
        
        # Parameters
        self.beta = nn.Parameter(torch.ones(in_channels))
        self.alpha = nn.Parameter(torch.ones(in_channels), requires_grad=False)
        self.epsilon = nn.Parameter(torch.ones(in_channels), requires_grad=False)
        
        # Gamma weights (constrained to be non-negative)
        self.gamma = nn.Parameter(torch.zeros(filter_size, filter_size, in_channels, in_channels))
        
        # Apply constraints
        self.constraint = NonNegConstraint()
        
    def forward(self, x):
        # Apply constraints
        with torch.no_grad():
            self.beta.data = self.constraint(self.beta.data)
            self.alpha.data = self.constraint(self.alpha.data)
            self.epsilon.data = self.constraint(self.epsilon.data)
            self.gamma.data = self.constraint(self.gamma.data)
        
        # Compute normalization
        abs_x = torch.abs(x)
        norm_conv = F.conv2d(
            abs_x ** self.alpha.view(1, -1, 1, 1),
            self.gamma.permute(3, 2, 0, 1),  # [out_c, in_c, h, w]
            padding=self.padding,
            groups=self.gamma.shape[3]  # Depthwise convolution
        )
        
        norm = self.beta.view(1, -1, 1, 1) + norm_conv
        norm = norm ** self.epsilon.view(1, -1, 1, 1)
        
        return x / norm

# ========== DATASET ==========
class CityscapesDataset(Dataset):
    def __init__(self, image_folder, mask_folder, image_names, mask_names, 
                 height=IMG_HEIGHT, width=IMG_WIDTH, transform=None, is_train=True):
        self.image_folder = image_folder
        self.mask_folder = mask_folder
        self.image_names = image_names
        self.mask_names = mask_names
        self.height = height
        self.width = width
        self.transform = transform
        self.is_train = is_train
        
    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.image_folder, self.image_names[idx])
        image = Image.open(img_path).convert('RGB')
        
        # Load mask
        mask_path = os.path.join(self.mask_folder, self.mask_names[idx])
        mask = Image.open(mask_path).convert('RGB')
        
        # Resize
        image = image.resize((self.width, self.height), Image.BILINEAR)
        mask = mask.resize((self.width, self.height), Image.NEAREST)
        
        # Convert to numpy arrays
        image = np.array(image).astype(np.float32) / 255.0
        mask = np.array(mask).astype(np.int32)
        
        # One-hot encode mask
        one_hot_mask = np.zeros((self.height, self.width, NUM_CLASSES), dtype=np.float32)
        for i, color in enumerate(colors):
            class_map = np.all(mask == color, axis=-1)
            one_hot_mask[:, :, i] = class_map
        
        # Convert to tensors
        image = torch.from_numpy(image).permute(2, 0, 1)  # [C, H, W]
        one_hot_mask = torch.from_numpy(one_hot_mask).permute(2, 0, 1)  # [C, H, W]
        
        return image, one_hot_mask

# ========== MODEL ==========
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, pool=True, dropout=0.2):
        super(ConvBlock, self).__init__()
        self.pool = pool
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout2d(dropout)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu2 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout2d(dropout)
        
        if pool:
            self.pool_layer = nn.MaxPool2d(2)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        
        if self.pool:
            return x, self.pool_layer(x)
        return x

class UNetGDN(nn.Module):
    def __init__(self, in_channels=3, num_classes=NUM_CLASSES):
        super(UNetGDN, self).__init__()
        
        # Encoder
        self.gdn = GDN(in_channels)
        self.enc1 = ConvBlock(in_channels, 16, pool=True)
        self.enc2 = ConvBlock(16, 32, pool=True)
        self.enc3 = ConvBlock(32, 64, pool=True)
        self.enc4 = ConvBlock(64, 128, pool=True)
        
        # Bridge
        self.bridge = ConvBlock(128, 256, pool=False)
        
        # Decoder
        self.up1 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec1 = ConvBlock(256, 128, pool=False)
        
        self.up2 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec2 = ConvBlock(128, 64, pool=False)
        
        self.up3 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.dec3 = ConvBlock(64, 32, pool=False)
        
        self.up4 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2)
        self.dec4 = ConvBlock(32, 16, pool=False)
        
        # Output
        self.out_conv = nn.Conv2d(16, num_classes, 1)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        # Encoder
        g1 = self.gdn(x)
        x1, p1 = self.enc1(g1)
        x2, p2 = self.enc2(p1)
        x3, p3 = self.enc3(p2)
        x4, p4 = self.enc4(p3)
        
        # Bridge
        b1 = self.bridge(p4)
        
        # Decoder
        u1 = self.up1(b1)
        c1 = torch.cat([u1, x4], dim=1)
        x5 = self.dec1(c1)
        
        u2 = self.up2(x5)
        c2 = torch.cat([u2, x3], dim=1)
        x6 = self.dec2(c2)
        
        u3 = self.up3(x6)
        c3 = torch.cat([u3, x2], dim=1)
        x7 = self.dec3(c3)
        
        u4 = self.up4(x7)
        c4 = torch.cat([u4, x1], dim=1)
        x8 = self.dec4(c4)
        
        # Output
        out = self.out_conv(x8)
        return self.softmax(out)

# ========== METRICS ==========
def iou_metrics(y_true, y_pred, num_classes=NUM_CLASSES):
    """Calculate mean IoU"""
    ious = []
    batch_size = y_true.shape[0]
    
    for i in range(batch_size):
        # Flatten predictions and ground truth
        pred_flat = y_pred[i].reshape(-1).cpu().numpy()
        true_flat = torch.argmax(y_true[i], dim=0).reshape(-1).cpu().numpy()
        
        # Calculate IoU for each class
        iou = jaccard_score(true_flat, pred_flat, 
                           average='macro', 
                           labels=np.arange(num_classes),
                           zero_division=0)
        ious.append(iou)
    
    return np.mean(ious)

def color_to_one_hot_mask(mask, colors, height=IMG_HEIGHT, width=IMG_WIDTH):
    """Convert class indices to colored mask"""
    color_mask = np.zeros((height, width, 3), dtype=np.uint8)
    mask_np = mask.cpu().numpy() if torch.is_tensor(mask) else mask
    
    for c in range(len(colors)):
        color_true = mask_np == c
        for i in range(3):
            color_mask[:, :, i] += color_true * colors[c][i]
    
    return color_mask

# ========== TRAINING FUNCTION ==========
def train_model(seed, save_dir):
    print(f'\n{"="*50}')
    print(f'STARTING TRAINING WITH SEED {seed}')
    print(f'{"="*50}')
    
    # Set seed
    set_seed(seed)
    
    # Create save directory
    os.makedirs(save_dir, exist_ok=True)
    
    # Get file lists
    train_images_names = sorted([f for f in os.listdir(train_images_folder_path) if f.endswith('.png')])
    train_mask_names = sorted([f for f in os.listdir(train_mask_folder_path) if f.endswith('.png')])
    test_images_names = sorted([f for f in os.listdir(test_images_folder_path) if f.endswith('.png')])
    test_mask_names = sorted([f for f in os.listdir(test_mask_folder_path) if f.endswith('.png')])
    
    # Split into train/val (300 for validation)
    total_train = len(train_images_names)
    val_indices = random.sample(range(total_train), 300)
    train_indices = [i for i in range(total_train) if i not in val_indices]
    
    train_img_names = [train_images_names[i] for i in train_indices]
    train_msk_names = [train_mask_names[i] for i in train_indices]
    val_img_names = [train_images_names[i] for i in val_indices]
    val_msk_names = [train_mask_names[i] for i in val_indices]
    
    print(f'Train samples: {len(train_img_names)}')
    print(f'Val samples: {len(val_img_names)}')
    print(f'Test samples: {len(test_images_names)}')
    
    # Create datasets
    train_dataset = CityscapesDataset(
        train_images_folder_path, train_mask_folder_path,
        train_img_names, train_msk_names
    )
    
    val_dataset = CityscapesDataset(
        train_images_folder_path, train_mask_folder_path,
        val_img_names, val_msk_names
    )
    
    test_dataset = CityscapesDataset(
        test_images_folder_path, test_mask_folder_path,
        test_images_names, test_mask_names, is_train=False
    )
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
    
    # Initialize model
    model = UNetGDN(in_channels=3, num_classes=NUM_CLASSES).to(device)
    print(f'\nModel architecture:')
    print(model)
    
    # Loss and optimizer
    criterion = nn.L1Loss()  # MAE loss
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=15, verbose=True)
    
    # Training variables
    best_iou = 0.0
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'val_iou': []}
    
    # Training loop
    epochs = 300
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} [Train]')
        for images, masks in pbar:
            images, masks = images.to(device), masks.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()
            
            # Calculate accuracy
            preds = torch.argmax(outputs, dim=1)
            true_labels = torch.argmax(masks, dim=1)
            correct = (preds == true_labels).sum().item()
            
            train_loss += loss.item() * images.size(0)
            train_correct += correct
            train_total += true_labels.numel()
            
            pbar.set_postfix({'Loss': loss.item(), 'Acc': correct/true_labels.numel()})
        
        train_loss = train_loss / len(train_loader.dataset)
        train_acc = train_correct / train_total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        all_preds = []
        all_masks = []
        
        with torch.no_grad():
            for images, masks in val_loader:
                images, masks = images.to(device), masks.to(device)
                outputs = model(images)
                
                loss = criterion(outputs, masks)
                val_loss += loss.item() * images.size(0)
                
                preds = torch.argmax(outputs, dim=1)
                true_labels = torch.argmax(masks, dim=1)
                correct = (preds == true_labels).sum().item()
                
                val_correct += correct
                val_total += true_labels.numel()
                
                all_preds.append(preds.cpu())
                all_masks.append(masks.cpu())
        
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_correct / val_total
        
        # Calculate IoU
        all_preds = torch.cat(all_preds, dim=0)
        all_masks = torch.cat(all_masks, dim=0)
        val_iou = iou_metrics(all_masks, all_preds)
        
        # Update learning rate
        scheduler.step(val_iou)
        
        # Save best model
        if val_iou > best_iou:
            best_iou = val_iou
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_iou': val_iou,
                'val_loss': val_loss,
            }, os.path.join(save_dir, 'best_model.pth'))
        
        # Update history
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)
        history['val_iou'].append(val_iou)
        
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val IoU: {val_iou:.4f}')
        
        # Save checkpoint every 50 epochs
        if (epoch + 1) % 50 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'history': history,
            }, os.path.join(save_dir, f'checkpoint_epoch_{epoch+1}.pth'))
    
    # Load best model for testing
    checkpoint = torch.load(os.path.join(save_dir, 'best_model.pth'))
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # Test phase
    model.eval()
    test_correct = 0
    test_total = 0
    test_preds = []
    test_masks = []
    
    with torch.no_grad():
        for images, masks in test_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            
            preds = torch.argmax(outputs, dim=1)
            true_labels = torch.argmax(masks, dim=1)
            correct = (preds == true_labels).sum().item()
            
            test_correct += correct
            test_total += true_labels.numel()
            
            test_preds.append(preds.cpu())
            test_masks.append(masks.cpu())
    
    test_acc = test_correct / test_total
    test_preds = torch.cat(test_preds, dim=0)
    test_masks = torch.cat(test_masks, dim=0)
    test_iou = iou_metrics(test_masks, test_preds)
    
    print(f'\nTest Results:')
    print(f'Test Accuracy: {test_acc:.4f}')
    print(f'Test IoU: {test_iou:.4f}')
    print(f'Best Validation IoU: {best_iou:.4f}')
    
    # Save history
    np.save(os.path.join(save_dir, 'history.npy'), history)
    
    # Plot training curves
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    axes[0, 0].plot(history['train_loss'], label='Train')
    axes[0, 0].plot(history['val_loss'], label='Validation')
    axes[0, 0].set_title('Model Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    axes[0, 1].plot(history['train_acc'], label='Train')
    axes[0, 1].plot(history['val_acc'], label='Validation')
    axes[0, 1].set_title('Model Accuracy')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    axes[1, 0].plot(history['val_iou'])
    axes[1, 0].set_title('Validation IoU')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('IoU')
    axes[1, 0].grid(True)
    
    # Visualize predictions
    axes[1, 1].axis('off')
    axes[1, 1].text(0.5, 0.5, f'Best Val IoU: {best_iou:.4f}\nTest IoU: {test_iou:.4f}\nTest Acc: {test_acc:.4f}',
                   ha='center', va='center', fontsize=12, bbox=dict(boxstyle="round", fc="w"))
    
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'training_curves.png'), dpi=150, bbox_inches='tight')
    plt.close()
    
    # Generate prediction visualization
    visualize_predictions(model, test_dataset, colors, save_dir)
    
    return model, history, test_iou, test_acc

def visualize_predictions(model, dataset, colors, save_dir, num_samples=5):
    """Visualize model predictions"""
    model.eval()
    indices = random.sample(range(len(dataset)), num_samples)
    
    fig, axes = plt.subplots(num_samples, 3, figsize=(15, num_samples*4))
    
    for i, idx in enumerate(indices):
        image, mask = dataset[idx]
        
        with torch.no_grad():
            output = model(image.unsqueeze(0).to(device))
            pred = torch.argmax(output, dim=1).squeeze(0).cpu()
        
        # Original image
        axes[i, 0].imshow(image.permute(1, 2, 0).numpy())
        axes[i, 0].set_title('Original Image')
        axes[i, 0].axis('off')
        
        # Ground truth mask
        true_mask = torch.argmax(mask, dim=0)
        colored_true = color_to_one_hot_mask(true_mask, colors)
        axes[i, 1].imshow(colored_true)
        axes[i, 1].set_title('Ground Truth')
        axes[i, 1].axis('off')
        
        # Predicted mask
        colored_pred = color_to_one_hot_mask(pred, colors)
        axes[i, 2].imshow(colored_pred)
        axes[i, 2].set_title('Prediction')
        axes[i, 2].axis('off')
    
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'predictions.png'), dpi=150, bbox_inches='tight')
    plt.close()

# ========== MAIN EXECUTION ==========
if __name__ == "__main__":
    seeds = [0]  # You can add more seeds: [0, 11, 25, 333, 41, 55, 666, 70, 8, 123]
    
    results = []
    for i, seed in enumerate(seeds):
        save_dir = f'./Good_train/1_gdn/Train_{i}'
        model, history, test_iou, test_acc = train_model(seed, save_dir)
        
        results.append({
            'seed': seed,
            'test_iou': test_iou,
            'test_acc': test_acc,
            'best_val_iou': max(history['val_iou'])
        })
    
    # Print summary
    print('\n' + '='*50)
    print('TRAINING SUMMARY')
    print('='*50)
    for res in results:
        print(f"Seed {res['seed']}: Test IoU: {res['test_iou']:.4f}, "
              f"Test Acc: {res['test_acc']:.4f}, Best Val IoU: {res['best_val_iou']:.4f}")

Using device: cpu

STARTING TRAINING WITH SEED 0


FileNotFoundError: [WinError 3] The system cannot find the path specified: '/kaggle/input/cityscapes/train/img'