# Semantic Segmentation

#### 1) Here we will dive into how to train your own UNet Model with a good real time accuracy

In [None]:
import os
import torch
import tqdm
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
from torchvision.utils import save_image
from utils import PairedImageDataset, UNet, class_indices_to_rgb, save_preds

check = torch.manual_seed(0)

USER VARIABLES

In [None]:
input_dir = r''
output_dir = r''
height = 0 # of Input image
width = 0 # of Output image
batch_size = 0
file_extension = ''

#TRAINING

train_split = 0
val_split = 0 
test_split = 0

assert abs((train_split + val_split + test_split) - 1.0) < 1e-6, "splits must sum to 1"

In [None]:
dataset = PairedImageDataset(input_dir,output_dir,height,width,file_extension)

In [None]:
dataset_size = len(dataset)

train_size = int(dataset_size * train_split)
val_size = int(dataset_size * val_split)
test_size = dataset_size - train_size - val_size  # ensures all samples are used

print(f"Full dataset has {dataset_size} samples.")
print(f"Splitting into {train_size} training samples, {val_size} validation samples, and {test_size} testing samples.")

In [None]:
train_dataset, val_dataset, test_dataset = random_split(
    dataset, [train_split, val_split, test_split]
)

train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=batch_size,shuffle=True)

### Training Loop

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    check = True
else:
    device = torch.device('cpu')
    check = False
num_channels = dataset.num_masks
model = UNet(num_channels=num_channels).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [None]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=5,
    verbose=True,
    min_lr=1e-6
)

In [None]:
num_epochs = 0 # Adjust
train_losses = []
val_losses = []
learning_rates = []
best_loss = float('inf')
train_loader_size = len(train_loader)
val_loader_size = len(val_loader)
test_loader_size = len(test_loader)

print("Training on {device}")

In [None]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, targets in tqdm(train_loader,desc=f'Epoch: {epoch + 1}/{num_epochs} - Training'):
        inputs, targets = inputs.to(device), targets.to(device)

        if targets.dim() == 4 and targets.shape[1] == 1:
            targets.squeeze(1)
        targets = targets.long()

        optimizer.zero_grad()
        outputs = model(inputs)

        if outputs.shape[2:] != targets.shape[1:]:
            outputs = nn.functional.interpolate(
                outputs,
                size=(targets.shape[1],targets.shape[2]),
                mode='bilinear',
                align_corners=False
            )

        loss = criterion(outputs,targets)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(),max_norm=1.0)
        optimizer.step()

        train_loss += loss.item()

    model.eval()
    val_loss = 0.0

    with torch.no_grad():
        for inputs, targets in tqdm(val_loader,desc=f'Epoch: {epoch + 1}/{num_epochs} - Validation'):
            inputs, targets = inputs.to(device), targets.to(device)

            if targets.dim() == 4 and targets.shape[1] == 1:
                targets.squeeze(1)
            targets = targets.long()

            outputs = model(inputs)

            if outputs.shape[2:] != targets.shape[1:]:
                outputs = nn.functional.interpolate(
                    outputs,
                    size=(targets.shape[1],targets.shape[2]),
                    mode='bilinear',
                    align_corners=False
                )

            loss = criterion(outputs,targets)
            val_loss += loss.item()

    avg_train_loss = float(train_loss/train_loader_size)
    avg_val_loss = float(val_loss/val_loader_size)

    scheduler.step(avg_val_loss)
    learning_rates.append(optimizer.param_groups[0]['lr'])

    train_losses.append(avg_train_loss)
    val_losses.append(val_loader_size)

    print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}")

    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save(model.state_dict(),'best_model.pth')
        print(f"✓ Best model saved with loss: {best_loss:.4f}")
        if check:
            torch.cuda.empty_cache()

In [None]:
# Save final model
torch.save(model.state_dict(), 'final_model.pth')
print("Training completed!")

In [None]:
# Plot results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(train_losses, label='Training Loss')
ax1.plot(val_losses, label='Validation Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Progress')
ax1.legend()
ax1.grid(True)

# Learning rate plot
ax2.plot(learning_rates, label='Learning Rate', color='green')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Learning Rate')
ax2.set_title('Learning Rate Schedule')
ax2.legend()
ax2.grid(True)
ax2.set_yscale('log')  # Log scale for better visualization

plt.tight_layout()
plt.show()

if learning_rates:
    print(f"Final learning rate: {learning_rates[-1]:.6f}")
else:
    print("No learning rate data recorded")

### Evaluation

In [None]:
model.eval()

test_loss = 0.0
with torch.no_grad():
    for inputs, targets in tqdm(test_loader,desc="Testing"):
        inputs, targets1 = inputs.to(inputs), targets.to(targets)

        if targets.dim() == 4 and targets.shape[1] == 1:
            targets = targets.squeeze(1)
        targets = targets.long()

        outputs = model(inputs)

        if outputs.shape[2:] != targets1.shape[1:]:
            outputs = nn.functional.interpolate(
                outputs, 
                size=(targets1.shape[1], targets1.shape[2]), 
                mode='bilinear', 
                align_corners=False
            )
        
        loss = criterion(outputs, targets1)
        test_loss += loss

avg_test_loss = float(test_loss/test_loader_size)

print(f"Test Loss = {avg_test_loss:.4f}")

if check:
    torch.cuda.empty_cache()

### Saving Prediction

We are going to save images like the one shown below where the **Original, Target and Predicted** Images will be shown side by side

![Alt Text](analysis/comparison_7.png)

Provide directories where the above predictions for all images in **train, val and test loaders** can be saved in **three separate directories**

In [None]:
train_output_dir = r''
val_output_dir = r''
test_output_dir = r''

os.makedirs(train_output_dir,exist_ok=True)
os.makedirs(val_output_dir, exist_ok=True)
os.makedirs(test_output_dir, exist_ok=True)

In [None]:
save_preds(model,train_loader,device,train_output_dir,dataset.color_to_class)
save_preds(model,val_loader,device,val_output_dir,dataset.color_to_class)
save_preds(model,test_loader,device,test_output_dir,dataset.color_to_class)