In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Installing the Required Dependencies 

In [None]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
# Setting device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## Creating the Directories

In [None]:
# Path to data
TRAIN_DIR = "/kaggle/input/depth-estimation/competition-data/competition-data/training"
VAL_DIR = "/kaggle/input/depth-estimation/competition-data/competition-data/validation"
TEST_DIR = "/kaggle/input/depth-estimation/competition-data/competition-data/testing"

In [None]:
# Function to convert images to CSV 
def images_to_csv_with_metadata(image_folder, output_csv):
    # Initialize an empty list to store image data and metadata
    data = []
    
    # Loop through all images in the folder
    for idx, filename in enumerate(sorted(os.listdir(image_folder))):
        if filename.endswith(".png"):
            filepath = os.path.join(image_folder, filename)
            
            # Read the image
            image = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
            image = cv2.resize(image, (128, 128))
            image = image / 255.
            image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-6)
            image = np.uint8(image * 255.)
            
            # Flatten the image into a 1D array
            image_flat = image.flatten()
            
            # Add ID, ImageID (filename), and pixel values
            row = [idx, filename] + image_flat.tolist()
            data.append(row)
    
    # Create a DataFrame
    num_columns = len(data[0]) - 2 if data else 0
    column_names = ["id", "ImageID"] + [indx for indx in range(num_columns)]
    df = pd.DataFrame(data, columns=column_names)
    
    # Save to CSV
    df.to_csv(output_csv, index=False)

## Custom Dataset Creation

In [None]:
# Custom Dataset Class
class DepthEstimationDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.rgb_dir = os.path.join(root_dir, 'images')
        self.depth_dir = os.path.join(root_dir, 'depths') if 'depths' in os.listdir(root_dir) else None
        
        # Get all RGB image files
        self.rgb_files = [f for f in os.listdir(self.rgb_dir) 
                         if os.path.isfile(os.path.join(self.rgb_dir, f)) and 
                         (f.endswith('.jpg') or f.endswith('.png'))]
        
        if len(self.rgb_files) == 0:
            raise ValueError(f"No image files found in {self.rgb_dir}")
        
        self.transform = transform or transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
    def __len__(self):
        return len(self.rgb_files)
    
    def __getitem__(self, idx):
        # Load RGB image
        img_name = self.rgb_files[idx]
        img_path = os.path.join(self.rgb_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        
        # Apply transforms to image
        if self.transform:
            image = self.transform(image)
        
        # For test dataset, we might not have depth maps
        depth_map = None
        if self.depth_dir:
            # Assuming depth maps have the same filename but possibly different extension
            depth_name = os.path.splitext(img_name)[0]
            potential_depth_files = [
                f"{depth_name}.png",
                f"{depth_name}.jpg",
                f"{depth_name}.npy"
            ]
            
            for depth_file in potential_depth_files:
                depth_path = os.path.join(self.depth_dir, depth_file)
                if os.path.exists(depth_path):
                    # Handle different depth map formats
                    if depth_file.endswith('.npy'):
                        depth_map = torch.from_numpy(np.load(depth_path)).float()
                    else:
                        depth_img = Image.open(depth_path).convert('L')  # Grayscale
                        depth_map = transforms.ToTensor()(depth_img)
                    break
        
        return {
            'image': image,
            'depth': depth_map,
            'filename': img_name
        }

In [None]:
# Data Augmentation transforms
def get_training_augmentation():
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform

## Defining the Model

In [None]:
# U-Net Model 
class DoubleConv(nn.Module):
    """Double convolution block with batch normalization"""
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    """Downscaling with maxpool then double conv"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    """Upscaling then double conv"""
    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

       
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=1, bilinear=True, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        # Encoder path
        self.inc = DoubleConv(n_channels, features[0])
        self.down1 = Down(features[0], features[1])
        self.down2 = Down(features[1], features[2])
        self.down3 = Down(features[2], features[3])
        factor = 2 if bilinear else 1
        self.down4 = Down(features[3], features[3] * 2 // factor)
        
        # Decoder path
        self.up1 = Up(features[3] * 2, features[3] // factor, bilinear)
        self.up2 = Up(features[3], features[2] // factor, bilinear)
        self.up3 = Up(features[2], features[1] // factor, bilinear)
        self.up4 = Up(features[1], features[0], bilinear)
        self.outc = OutConv(features[0], n_classes)

    def forward(self, x):
        # Encoder
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        
        # Decoder
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        
        return logits

## Creating Loss functions

In [None]:
# Custom Loss Functions
class GradientLoss(nn.Module):
    def __init__(self):
        super(GradientLoss, self).__init__()
        self.sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).reshape(1, 1, 3, 3)
        self.sobel_y = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=torch.float32).reshape(1, 1, 3, 3)
        
    def forward(self, pred, target):
        if self.sobel_x.device != pred.device:
            self.sobel_x = self.sobel_x.to(pred.device)
            self.sobel_y = self.sobel_y.to(pred.device)
            
        # Expand dimensions for depthwise convolution
        pred_expanded = pred.expand(-1, 1, -1, -1)
        target_expanded = target.expand(-1, 1, -1, -1)
        
        # Compute gradients using Sobel operator
        pred_grad_x = F.conv2d(pred_expanded, self.sobel_x, padding=1)
        pred_grad_y = F.conv2d(pred_expanded, self.sobel_y, padding=1)
        target_grad_x = F.conv2d(target_expanded, self.sobel_x, padding=1)
        target_grad_y = F.conv2d(target_expanded, self.sobel_y, padding=1)
        
        # Compute L1 loss on gradients
        loss_grad_x = F.l1_loss(pred_grad_x, target_grad_x)
        loss_grad_y = F.l1_loss(pred_grad_y, target_grad_y)
        
        return loss_grad_x + loss_grad_y

class DepthLoss(nn.Module):
    def __init__(self):
        super(DepthLoss, self).__init__()
        self.l1_loss = nn.L1Loss()
        self.mse_loss = nn.MSELoss()
        self.gradient_loss = GradientLoss()
        
    def forward(self, pred, target):
        # Scale weights for different loss components
        l1 = self.l1_loss(pred, target) * 0.5
        mse = self.mse_loss(pred, target) * 0.3
        gradient = self.gradient_loss(pred, target) * 0.2
        
        return l1 + mse + gradient

In [None]:
# Custom collate function to handle None values
def custom_collate_fn(batch):
   
    filtered_batch = []
    for item in batch:
        if item['depth'] is None:
           
            item['depth'] = torch.zeros((1, item['image'].shape[1], item['image'].shape[2]), dtype=torch.float32)
        filtered_batch.append(item)
    
    return torch.utils.data.dataloader.default_collate(filtered_batch)

## Loading the Dataset 

In [None]:
# Initialize datasets and dataloaders
def initialize_dataloaders(batch_size=8):
    # Training dataset with augmentation
    train_transform = get_training_augmentation()
    train_dataset = DepthEstimationDataset(TRAIN_DIR, transform=train_transform)
    
    # Validation and test datasets without augmentation
    val_dataset = DepthEstimationDataset(VAL_DIR)
    test_dataset = DepthEstimationDataset(TEST_DIR)
    
    # Creating dataloaders with custom collate function
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=4, 
        pin_memory=True,
        collate_fn=custom_collate_fn  
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        collate_fn=custom_collate_fn 
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        collate_fn=custom_collate_fn  
    )
    
    return train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset

## Training the Model

In [None]:
# Training function
def train_model(model, train_loader, val_loader, num_epochs=20, batch_size=32):
    # Define loss function, optimizer and scheduler
    criterion = DepthLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, verbose=True
    )
    
    # Initialize variables
    best_val_rmse = float('inf')
    train_losses = []
    val_losses = []
    train_rmses = []
    val_rmses = []
    
    # Training loop
    print("Starting training...")
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        train_rmse = 0.0
        
        for batch_idx, batch in enumerate(train_loader):
            images = batch['image'].to(device)
            depths = batch['depth'].to(device)
            
            # Clear gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            
            # Compute loss
            loss = criterion(outputs, depths)
            
            # Compute RMSE for monitoring
            with torch.no_grad():
                rmse = torch.sqrt(F.mse_loss(outputs, depths))
                train_rmse += rmse.item()
            
            # Backward pass
            loss.backward()
            
            # Clip gradients to prevent explosion
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # Update weights
            optimizer.step()
            
            train_loss += loss.item()
            
            # Printing batch progress every 10 batches
            if (batch_idx + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}, RMSE: {rmse.item():.4f}')
        
        # Validation
        model.eval()
        val_loss = 0.0
        val_rmse = 0.0
        
        with torch.no_grad():
            for batch in val_loader:
                images = batch['image'].to(device)
                depths = batch['depth'].to(device)
                
                outputs = model(images)
                loss = criterion(outputs, depths)
                rmse = torch.sqrt(F.mse_loss(outputs, depths))
                
                val_loss += loss.item()
                val_rmse += rmse.item()
        
        # Calculate average metrics
        train_loss = train_loss / len(train_loader)
        train_rmse = train_rmse / len(train_loader)
        val_loss = val_loss / len(val_loader)
        val_rmse = val_rmse / len(val_loader)
        
        # Storing metrics for plotting
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_rmses.append(train_rmse)
        val_rmses.append(val_rmse)
        
        # Updating learning rate
        scheduler.step(val_rmse)
        
        # Printing epoch statistics
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train RMSE: {train_rmse:.4f}, Val Loss: {val_loss:.4f}, Val RMSE: {val_rmse:.4f}')
        
        # Save best model
        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_rmse': val_rmse,
            }, 'best_unet_model.pth')
            print(f"Saved best model with validation RMSE: {best_val_rmse:.4f}")
    
    # Plot training and validation metrics
    plot_metrics(train_losses, val_losses, train_rmses, val_rmses)
    
    return model

In [None]:
# Function to plot training metrics
def plot_metrics(train_losses, val_losses, train_rmses, val_rmses):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss vs. Epoch')
    
    plt.subplot(1, 2, 2)
    plt.plot(train_rmses, label='Train RMSE')
    plt.plot(val_rmses, label='Val RMSE')
    plt.xlabel('Epoch')
    plt.ylabel('RMSE')
    plt.legend()
    plt.title('RMSE vs. Epoch')
    
    plt.tight_layout()
    plt.savefig('training_metrics.png')
    plt.show()

## Generate Prdictions CSV

In [None]:
# Function to generate predictions
def generate_predictions(model, test_loader, device, output_folder="predictions"):
    """Generate depth predictions and save them as PNG files."""
    model.eval()
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    print("Generating predictions...")
    with torch.no_grad():
        for batch in test_loader:
            images = batch['image'].to(device)
            filenames = batch['filename']
            
            # Generate predictions
            outputs = model(images)
            
            # Process each prediction
            for i in range(len(outputs)):
                # Get filename and prediction
                filename = filenames[i]
                pred = outputs[i].cpu().numpy().squeeze()
                
                # Resize to required size (128x128)
                pred_resized = cv2.resize(pred, (128, 128))
                
                # Normalize prediction
                pred_norm = (pred_resized - np.min(pred_resized)) / (np.max(pred_resized) - np.min(pred_resized) + 1e-6)
                pred_uint8 = np.uint8(pred_norm * 255)
                
                # Save prediction
                output_path = os.path.join(output_folder, filename)
                cv2.imwrite(output_path, pred_uint8)
    
    print(f"Predictions saved to {output_folder}")
    return output_folder


In [None]:
# Main execution function
def main():
    
    BATCH_SIZE = 32
    NUM_EPOCHS = 20
    
    # Initializing dataloaders
    train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset = initialize_dataloaders(BATCH_SIZE)
    
    # Printing basic  dataset stats
    print(f"Train dataset size: {len(train_dataset)}")
    print(f"Validation dataset size: {len(val_dataset)}")
    print(f"Test dataset size: {len(test_dataset)}")
    
    # Create model
    model = UNet(n_channels=3, n_classes=1, bilinear=True).to(device)
    
    
    print(model)
    
    # Training the model
    trained_model = train_model(model, train_loader, val_loader, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE)
    
    # Loading best model for predictions
    checkpoint = torch.load('best_unet_model.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model from epoch {checkpoint['epoch']} with validation RMSE: {checkpoint['val_rmse']:.4f}")
    
    # Generating predictions
    output_folder = "data/sample_solution"
    predictions_folder = generate_predictions(model, test_loader, device, output_folder)
    
    # Converting predictions to CSV
    predictions_csv = "predictions.csv"
    images_to_csv_with_metadata(predictions_folder, predictions_csv)
    print(f"Predictions saved to CSV: {predictions_csv}")

if __name__ == "__main__":
    main()