In [4]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.10-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.15.0-py2.py3-none-any.whl (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.3/181.3 KB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━

In [5]:
import os
from PIL import Image
import random
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.models.segmentation as segmentation
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import pickle
from sklearn import manifold
import pandas as pd
import seaborn as sns
from sklearn.metrics import classification_report
import wandb
from sklearn.metrics import accuracy_score, f1_score

In [6]:
from google.colab import drive
drive.mount("/content/gdrive")
dir_path = "/content/gdrive/MyDrive/VOC Segmentation Dataset"

Mounted at /content/gdrive


In [7]:
# Create empty lists to hold the file names
image_files = []
mask_files = []

# Iterate over the files in the "images" folder and add the file names to the image_files list
for filename in os.listdir(os.path.join(dir_path, "images")):
    if filename.endswith(".jpg"):
        image_files.append(filename)

# Iterate over the files in the "masks" folder and add the file names to the mask_files list
for filename in os.listdir(os.path.join(dir_path, "masks")):
    if filename.endswith(".png"):
        mask_files.append(filename)

print(len(image_files))
print(len(mask_files))

1464
1464


In [8]:
num_samples = len(image_files)

## 2.1.a

In [9]:
# Split the data into training, validation, and testing sets
val_split = int(0.2 * num_samples)
test_split = int(0.1 * num_samples) + val_split

In [10]:
# validation data splitted to 20% of the dataset
images_val_data = image_files[:val_split]  
masks_val_data = mask_files[:val_split]

# testing data splitted to 10% of the dataset
images_test_data = image_files[val_split:test_split]
masks_test_data = mask_files[val_split:test_split]

# rest reserved for training data
images_train_data = image_files[test_split:]
masks_train_data = mask_files[test_split:]

## 2.1.b

In [11]:
import os
import random
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Set the seed for reproducibility
seed = 42
random.seed(seed)
torch.manual_seed(seed)

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Create a custom dataset for image and mask representation of data in data segmentation
class vocDataset(Dataset):
    def __init__(self, im, ma, transform=None):
        self.transform = transform
        self.image_files = im
        self.mask_files = ma

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_file = self.image_files[idx]
        mask_file = self.mask_files[idx]
        image = Image.open(os.path.join("/content/gdrive/MyDrive/VOC Segmentation Dataset/images", image_file))
        mask = Image.open(os.path.join("/content/gdrive/MyDrive/VOC Segmentation Dataset/masks", mask_file))
        
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        return image, mask


train_dataset = vocDataset(images_train_data, masks_train_data, transform)
val_dataset = vocDataset(images_val_data, masks_val_data, transform)
test_dataset = vocDataset(images_test_data, images_test_data, transform)

# Define data loaders for the splits
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4)



In [12]:
import torch
import torch.nn as nn
import torchvision.models as models

class FCNResNet50(nn.Module):
    def __init__(self, num_classes):
        super(FCNResNet50, self).__init__()
        resnet = models.resnet50(pretrained=True)

        # Remove the final average pooling and fully connected layers from ResNet50
        self.features = nn.Sequential(*list(resnet.children())[:-2])

        # Add an upsampling layer to increase the spatial resolution of the feature maps
        self.upsample = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)

        # Add a convolutional layer to map the feature maps to the desired number of classes
        self.classifier = nn.Conv2d(2048, num_classes, kernel_size=1)

    def forward(self, x):
        x = self.features(x)
        x = self.upsample(x)
        x = self.classifier(x)
        return x

In [13]:
model = FCNResNet50(num_classes=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Forward pass
inputs = torch.randn(4, 3, 224, 224)
labels = torch.randint(low=0, high=2, size=(4, 224, 224))
outputs = model(inputs)

# Compute loss and update parameters
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
def trainerr(train_loader):
    train_loss = 0.0
    train_acc = 0.0
    num_batches = 0
    for images, labels in train_loader:
        # Forward pass
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = criterion(outputs['out'][0], labels[0])

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Compute accuracy
        preds = (outputs['out'] > 0).long()
        _, predicted = torch.max(outputs['out'].data, 1)
        train_acc += (predicted == labels).sum().item()

        # Log the training loss and accuracy to wandb
        train_loss += (loss.item())
        num_batches += 1

    return [train_loss,train_acc,num_batches]



## Training fcn_resnet50 model

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

wandb.init(project='train_fcnresnet', name='train_fcnresnet')

# Define hyperparameters
batch_size = 4
learning_rate = 0.001
num_epochs = 10
num_classes = 21

# Define model
model = models.segmentation.fcn_resnet50(pretrained=True)
model.eval()

for param in model.parameters():
    param.requires_grad = False

# change the classification FCNHead and make it learnable
model.classifier[4] = nn.Conv2d(512, 1, kernel_size=1)

# change the aux_classification FCNHead and make it learnable
model.aux_classifier[4] = nn.Conv2d(256, 1, kernel_size=1)

model.classifier[-1].requires_grad = True

model = model.to(device)

const = 84*256

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    # trainerr functions trains the model and returns the results
    vals = trainerr(train_loader) 

    # Compute average training loss and accuracy
    loss = vals[0] / (const*vals[2])
    accuracy = vals[1] / (const*vals[2])

    # Log the training loss and accuracy to wandb
    wandb.log({'epoch': epoch+1, 'train_loss': loss, 'train_acc': accuracy})
    print('Epoch', epoch+1, 'train_loss', loss, 'train_acc', accuracy)


ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to /root/.cache/torch/hub/checkpoints/fcn_resnet50_coco-1167a1af.pth


  0%|          | 0.00/135M [00:00<?, ?B/s]

Epoch 1 train_loss 0.004421701796897473 train_acc 33.908628346766726
Epoch 2 train_loss 0.004216018842099408 train_acc 33.9059945050491
Epoch 3 train_loss 0.0038886075826544348 train_acc 33.90925622741801
Epoch 4 train_loss 0.0041137081916912165 train_acc 33.91182420499815
Epoch 5 train_loss 0.003976740113519752 train_acc 33.900202079859184
Epoch 6 train_loss 0.004374693392359903 train_acc 33.91279950088012
Epoch 7 train_loss 0.004236579348507641 train_acc 33.90928735003243


KeyboardInterrupt: ignored

In [None]:
from sklearn.metrics import confusion_matrix, f1_score

wandb.init(project="accuracy_2.2.a")

# Define the confusion matrix
def plot_confusion_matrix(conf_matrix):
    conf_matrix = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
    print(conf_matrix)
    wandb.log({'Confusion Matrix': wandb.Image(conf_matrix, caption='Confusion Matrix')})

correct = 0
total = 0
conf_matrix = np.zeros((10,10))

with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs['out'][0].data, 1) 
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
f1 = f1_score(labels.detach().cpu().numpy(), predicted.detach().cpu().numpy(), average='weighted') 

# Log the Accuracy, F1-Score and Confusion Matrix
wandb.log({'Test Accuracy': accuracy, 'Test F1-Score': f1})

plot_confusion_matrix(conf_matrix)

print('Test Accuracy: {:.5f}%'.format(accuracy))
print('Test F1-Score: {:.2f}'.format(f1))

In [16]:
# declaring variables 
num_classes = 21
pixel_accuracy = np.zeros(num_classes, dtype=np.float32)
f1Score = np.zeros(num_classes, dtype=np.float32)
ious = np.zeros(num_classes, dtype=np.float32)
prec = np.zeros(num_classes, dtype=np.float32)
rec = np.zeros(num_classes, dtype=np.float32)
mlp = np.zeros(num_classes, dtype=np.float32)

In [17]:
import numpy as np
import torch.nn.functional as F
import tqdm

def compute_metrics(model, test_loader, device):

    with torch.no_grad():
        model.eval()
        for inputs, targets in tqdm.tqdm(test_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs)["out"]
            preds = torch.argmax(outputs, dim=1)

            for i in range(num_classes):
                # true positives count
                cond_tp = ((preds == i) & (targets == i))
                true_positives = cond_tp.sum().item()

                # false positives count
                cond_fp = ((preds == i) & (targets != i))
                false_positives = cond_fp.sum().item()

                # false negatives count
                cond_fn = ((preds != i) & (targets == i))
                false_negatives = cond_fn.sum().item()

                # true negatives count
                cond_tn = ((preds != i) & (targets != i))
                true_negatives = cond_tn.sum().item()

                # calculating pixel accuracy
                pixel_accuracy[i] += true_positives + true_negatives
                # calculating precision
                precision = true_positives / (true_positives + false_positives + 1e-8)
                # recall value
                recall = true_positives / (true_positives + false_negatives + 1e-8)

                ap = (true_positives / (true_positives + false_positives + 1e-8) + true_positives / (true_positives + false_negatives + 1e-8)) / 2
                if precision == 0 and recall == 0:
                    f1_score = 0
                    iou = 0
                else:
                    f1_score = 2 * (precision * recall) / (precision + recall)
                    iou = true_positives / (true_positives + false_positives + false_negatives)

                # calcualtion of other parameters
                prec[i] += precision
                ious[i] += iou
                f1Score[i] += f1_score
                mlp[i] += ap
                rec[i] += recall
                

    # final calculation
    mean_precision = prec.mean()
    mean_recall = rec.mean()
    mean_iou = ious.mean()
    pixel_acc = pixel_accuracy.sum() / (num_classes * test_loader.dataset.__len__() * 256 * 256)
    mean_f1_score = f1Score.mean()
    mean_ap = mlp.mean()

    return pixel_acc, mean_f1_score, mean_iou, mean_precision, mean_recall, mean_ap


In [21]:
# compute metrics on test data
pixel_acc, mean_f1_score, mean_iou, mean_precision, mean_recall, mean_ap = compute_metrics(model, test_loader, device) 

# print results
print(f"Pixel Accuracy: {pixel_acc:.4f}")
print(f"Mean F1 Score: {mean_f1_score:.4f}")
print(f"Mean IoU: {mean_iou:.4f}")
print(f"Mean Precision: {mean_precision:.4f}")
print(f"Mean Average Precision: {mean_ap:.4f}")

Pixel Accuracy: 0.692356
Mean F1 Score: 0.8569
Mean IoU: 0.2765
Mean Precision: 0.7836
Mean Average Precision: 0.7025


# Techniques used for Data Augmentation are Random Cropping, Random Erasing and Random Rotation.


### Random Cropping

In [None]:
import cv2

def random_crop(img, sz):
    # height, width = img.shape[:2]
    h = img.shape[0]
    w = img.shape[1]
    ch, cw = sz

    y = np.random.randint(0, h - ch + 1)
    x = np.random.randint(0, w - cw + 1)

    return img[y:y+ch, x:x+cw, :]


## Random Erasing


In [None]:
def random_erasing(img):

    if np.random.uniform() > 0.5:
        return img

    h = img.shape[0]
    w = img.shape[1]
    c = img.shape[2]

    # h, w, c = img.shape
    area = h * w

    while True:
        target_area = np.random.uniform(0.02, 0.4) * area
        aspect_ratio = np.random.uniform(0.3, 3.3)
        er_h = int(round(np.sqrt(target_area * aspect_ratio)))
        er_w = int(round(np.sqrt(target_area / aspect_ratio)))
        if er_h < h and er_w < w:
            break

    x1 = np.random.randint(0, h - er_h)
    y1 = np.random.randint(0, w - er_w)

    if c == 1:
        img[x1:x1+er_h, y1:y1+er_w] = np.random.uniform(0, 1)
    else:
        for i in range(c):
            img[x1:x1+er_h, y1:y1+er_w, i] = np.random.uniform(0, 1)

    return img


## Random Rotation

In [None]:
def random_rotation(img, angle_range=(-15, 15)):

    # Generating a random rotation
    angle = np.random.uniform(angle_range[0], angle_range[1])

    # Image center
    h, w = img.shape[:2]
    cx, cy = w // 2, h // 2

    # Rotation matrix
    M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)

    # Apply the rotation 
    rotated_img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)

    return rotated_img


## Training fcn_resnet50 model again

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

wandb.init(project='train_fcnresnet', name='train_fcnresnet')

# Define hyperparameters
batch_size = 4
learning_rate = 0.001
num_epochs = 10
num_classes = 21

# Define model
model = models.segmentation.fcn_resnet50(pretrained=True)
model.eval()

for param in model.parameters():
    param.requires_grad = False

# change the classification FCNHead and make it learnable
model.classifier[4] = nn.Conv2d(512, 1, kernel_size=1)

# change the aux_classification FCNHead and make it learnable
model.aux_classifier[4] = nn.Conv2d(256, 1, kernel_size=1)

model.classifier[-1].requires_grad = True

model = model.to(device)

const = 84*256

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    # trainerr functions trains the model and returns the results
    vals = trainerr(train_loader) 

    # Compute average training loss and accuracy
    loss = vals[0] / (const*vals[2])
    accuracy = vals[1] / (const*vals[2])

    # Log the training loss and accuracy to wandb
    wandb.log({'epoch': epoch+1, 'train_loss': loss, 'train_acc': accuracy})
    print('Epoch', epoch+1, 'train_loss', loss, 'train_acc', accuracy)


ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to /root/.cache/torch/hub/checkpoints/fcn_resnet50_coco-1167a1af.pth


  0%|          | 0.00/135M [00:00<?, ?B/s]

Epoch 1 train_loss 0.004421701796897473 train_acc 33.908628346766726
Epoch 2 train_loss 0.004216018842099408 train_acc 33.9059945050491
Epoch 3 train_loss 0.0038886075826544348 train_acc 33.90925622741801
Epoch 4 train_loss 0.0041137081916912165 train_acc 33.91182420499815
Epoch 5 train_loss 0.003976740113519752 train_acc 33.900202079859184
Epoch 6 train_loss 0.004374693392359903 train_acc 33.91279950088012
Epoch 7 train_loss 0.004236579348507641 train_acc 33.90928735003243


KeyboardInterrupt: ignored

In [None]:
from sklearn.metrics import confusion_matrix, f1_score

wandb.init(project="accuracy_2.2.a")

# Define the confusion matrix
def plot_confusion_matrix(conf_matrix):
    conf_matrix = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
    print(conf_matrix)
    wandb.log({'Confusion Matrix': wandb.Image(conf_matrix, caption='Confusion Matrix')})

correct = 0
total = 0
conf_matrix = np.zeros((10,10))

with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs['out'][0].data, 1) 
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
f1 = f1_score(labels.detach().cpu().numpy(), predicted.detach().cpu().numpy(), average='weighted') 

# Log the Accuracy, F1-Score and Confusion Matrix
wandb.log({'Test Accuracy': accuracy, 'Test F1-Score': f1})

plot_confusion_matrix(conf_matrix)

print('Test Accuracy: {:.5f}%'.format(accuracy))
print('Test F1-Score: {:.2f}'.format(f1))