<a href="https://colab.research.google.com/drive/1ZeyASa_LNl8DqZ6Q9CfRUMnTAtxNjgGZ?usp=drive_link" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import os
import torch
import torchvision
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.transforms import v2 as T
import dlib
import cv2
import numpy as np
from PIL import Image
from torchvision.transforms.functional import to_pil_image, to_tensor


In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
# Extract UBIPr dataset
!mkdir -p "/content/data/UBIPr"
!tar -xvf "/gdrive/MyDrive/Deep Learning F23 Final Project/Datasets/UBIPr/single_eye/Images.tar" -C "/content/data/UBIPr/"
!tar -xvf "/gdrive/MyDrive/Deep Learning F23 Final Project/Datasets/UBIPr/single_eye/Masks.tar" -C "/content/data/UBIPr/"

In [None]:
# Implement a UBIPr Dataset Class
class UBIPRDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, "Images", self.imgs[idx])
        mask_path = os.path.join(self.root, "Masks", self.masks[idx])
        img = read_image(img_path)
        mask = read_image(mask_path)
        # instances are encoded as different colors
        obj_ids = torch.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:2] # Select only the ID for irises
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set
        # of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)

        # get bounding box coordinates for each mask
        boxes = masks_to_boxes(masks)

        # # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = idx
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Wrap sample and targets into torchvision tv_tensors:
        img = tv_tensors.Image(img)

        target = {}
        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
        target["masks"] = tv_tensors.Mask(masks)
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
"""
The following code blocks are adapated from Pytorch's Object Detection finetuning tutorial:
https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
"""
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py")

0

In [None]:
def get_model_instance_segmentation(num_classes):
  """
  Outputs a MaskRCNN model with replaced classification for finetuning
  """
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )
    return model

In [None]:
def get_transform(train):
  """
  Outputs a set of transforms for data augmentation
  """
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomPhotometricDistort(p=1))
        transforms.append(T.RandomIoUCrop())
        transforms.append(T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.)))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

In [None]:
# Training loop
from engine import train_one_epoch, evaluate
import utils

filename = "/content/data/UBIPr"

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and iris
num_classes = 2
# use our dataset and defined transformations
dataset = UBIPRDataset(filename, get_transform(train=True))
dataset_test = UBIPRDataset(filename, get_transform(train=False))

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:8800]) # We need to change this to increase the training set size
dataset_test = torch.utils.data.Subset(dataset_test, indices[8800:11018]) # We need to change this to increase the test set size

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    collate_fn=utils.collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    collate_fn=utils.collate_fn
)

# get the model using our helper function
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

# let's train it for 5 epochs
num_epochs = 5

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)
    model_filename = f"maskrcnn_epoch{epoch}.pt"
    torch.save(model.state_dict(), model_filename)

print("That's it!")

# End of training loop

In [None]:
"""
Below is code we used to processed the FFHQ and GAN datasets to be used with our Residual Attention Network.
We use dlib to extract eye regions from input images, then our fine-tuned MaskRCNN to detect the iris bounding boxes.
The irises are then cropped, concatenated into pairs for each input and resized to 96 x 96.
"""

# MAKE SURE TO ADD THE PREDICTOR PATH (THE DLIB PREDICTION)
# shape_predictor_68_face_landmarks_GTX.dat.bz2
def resize_image(image, target_size=(960, 960)):
    return cv2.resize(image, target_size)


def extract_eye(image, eye_bounds):
    return image[eye_bounds[1]:eye_bounds[1] + eye_bounds[3], eye_bounds[0]:eye_bounds[0] + eye_bounds[2]]


def crop_eyes(image_folder, predictor_path, output_folder):
    # Initialize dlib's face detector and load the facial landmark predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(predictor_path)

    for i in range(1, 5001):
        image_path = os.path.join(image_folder, f"{str(i).zfill(4)}.jpeg")
        if not os.path.exists(image_path):
            continue  # Skip if the file does not exist

      # Load the image using OpenCV
        image = cv2.imread(image_path)
        #image = resize_image(image, (96,96))
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Detect faces in the image
        faces = detector(gray)

        for j, face in enumerate(faces):
            landmarks = predictor(gray, face)
            # Coordinates for the left and right eye
            # https://ibug.doc.ic.ac.uk/resources/300-W/
            # https://pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/
            left_eye = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in range(36, 42)])
            right_eye = np.array([(landmarks.part(n).x, landmarks.part(n).y) for n in range(42, 48)])

            # Cropping the eyes, can also modify the degree of crop so we dont overcrop eyes.
            left_eye_bounds = cv2.boundingRect(left_eye)
            right_eye_bounds = cv2.boundingRect(right_eye)
            margin = 40
            left_eye_bounds = (max(left_eye_bounds[0] - margin , 0),max(left_eye_bounds[1] - margin, 0),left_eye_bounds[2] + 2 * margin, left_eye_bounds[3] + 2 * margin)
            right_eye_bounds = (max(right_eye_bounds[0] - margin , 0),max(right_eye_bounds[1] - margin, 0),right_eye_bounds[2] + 2 * margin, right_eye_bounds[3] + 2 * margin)

            left_eye_image = image[left_eye_bounds[1]:left_eye_bounds[1] + left_eye_bounds[3], left_eye_bounds[0]:left_eye_bounds[0] + left_eye_bounds[2]]
            right_eye_image = image[right_eye_bounds[1]:right_eye_bounds[1] + right_eye_bounds[3], right_eye_bounds[0]:right_eye_bounds[0] + right_eye_bounds[2]]

            cv2.imwrite(os.path.join(output_folder, f"left_eye_{i}.jpeg"), left_eye_image)
            cv2.imwrite(os.path.join(output_folder, f"right_eye_{i}.jpeg"), right_eye_image)

In [None]:
# Extract eye regions and save on local VM,
output_directory = "/content/data/Cropped"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
crop_eyes("/gdrive/MyDrive/Deep Learning F23 Final Project/Datasets/GAN/GAN/", "/gdrive/MyDrive/Deep Learning F23 Final Project/shape_predictor_68_face_landmarks_GTX.dat", "/content/data/Cropped")

In [None]:
def detect_and_crop_iris(image_path, model, eval_transform):
    # Load the image
    try:
      image = read_image(image_path).float()  # Convert image to floating point tensor
    except RuntimeError:
      return None

    image /= 255.0  # Normalize to [0, 1]
    image = image.to(device)

    # Perform iris detection using the model
    model.eval()
    with torch.no_grad():
        x = eval_transform(image)
        x = x[:3, ...]  # Use only RGB channels
        x = x.unsqueeze(0).to(device)  # Add batch dimension
        predictions = model(x)
        pred = predictions[0]

    # Check if iris was detected
    if len(pred["labels"]) > 0 and pred["scores"][0] > 0.5:
        # Get the coordinates of the bounding box around the iris
        pred_boxes = pred["boxes"][:1].long()
        x_min, y_min, x_max, y_max = pred_boxes[0]

        # Crop the iris
        cropped_image = image[:, y_min:y_max, x_min:x_max]

        # Convert the cropped tensor to a PIL image
        cropped_image_pil = to_pil_image(cropped_image.cpu())

        # Resize the cropped iris to 96x96 pixels
        resized_iris_pil = cropped_image_pil.resize((48, 96), Image.LANCZOS)
        resized_iris_tensor = to_tensor(resized_iris_pil)

        # Convert to numpy array and change channel order from RGB to BGR
        resized_iris_numpy = resized_iris_tensor.numpy()
        resized_iris_numpy = np.transpose(resized_iris_numpy, (1, 2, 0))  # Change order to HWC
        resized_iris_numpy = cv2.cvtColor((resized_iris_numpy * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)

        return resized_iris_numpy
    else:
        # Iris not detected, return None
        return None

In [None]:
# Extract eye regions from previous session for iris detection/segmentation
!tar -xvf '/gdrive/MyDrive/Deep Learning F23 Final Project/Datasets/Pre-MaskRCNN.tar' -C '/content'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Pre-MaskRCNN/right_eye_5473.jpeg
Pre-MaskRCNN/right_eye_5474.jpeg
Pre-MaskRCNN/right_eye_5475.jpeg
Pre-MaskRCNN/right_eye_5476.jpeg
Pre-MaskRCNN/right_eye_5477.jpeg
Pre-MaskRCNN/right_eye_5478.jpeg
Pre-MaskRCNN/right_eye_5479.jpeg
Pre-MaskRCNN/right_eye_548.jpeg
Pre-MaskRCNN/right_eye_5480.jpeg
Pre-MaskRCNN/right_eye_5481.jpeg
Pre-MaskRCNN/right_eye_5482.jpeg
Pre-MaskRCNN/right_eye_5483.jpeg
Pre-MaskRCNN/right_eye_5484.jpeg
Pre-MaskRCNN/right_eye_5485.jpeg
Pre-MaskRCNN/right_eye_5486.jpeg
Pre-MaskRCNN/right_eye_5487.jpeg
Pre-MaskRCNN/right_eye_5488.jpeg
Pre-MaskRCNN/right_eye_5489.jpeg
Pre-MaskRCNN/right_eye_549.jpeg
Pre-MaskRCNN/right_eye_5490.jpeg
Pre-MaskRCNN/right_eye_5491.jpeg
Pre-MaskRCNN/right_eye_5492.jpeg
Pre-MaskRCNN/right_eye_5493.jpeg
Pre-MaskRCNN/right_eye_5494.jpeg
Pre-MaskRCNN/right_eye_5495.jpeg
Pre-MaskRCNN/right_eye_5496.jpeg
Pre-MaskRCNN/right_eye_5497.jpeg
Pre-MaskRCNN/right_eye_5498.jpeg
Pre-MaskRCNN/

In [None]:
# Load our MaskRCNN from a previous session
model_checkpoint = '/gdrive/MyDrive/Deep Learning F23 Final Project/Model Checkpoints/Mask R-CNN/maskrcnn_113023_1210am.pt'
num_classes = 2
model = get_model_instance_segmentation(num_classes)
model.load_state_dict(torch.load(model_checkpoint))
model.to(device)
eval_transform = get_transform(train=False)

input_folder = "/content/Pre-MaskRCNN"
cropped_folder = "/content/dataset"
iris_not_detected = []
iris_size = (96,96)

# Detect iris pairs, resize and save
for i in range(1,10001):
    right_eye_path = os.path.join(input_folder, f"right_eye_{i}.jpeg")
    left_eye_path = os.path.join(input_folder, f"left_eye_{i}.jpeg")

    # paste irises next to each other
    resized_right_iris = detect_and_crop_iris(right_eye_path, model, eval_transform)
    resized_left_iris = detect_and_crop_iris(left_eye_path, model, eval_transform)
    if resized_right_iris is not None and resized_left_iris is not None:
      combined_iris = np.concatenate((resized_left_iris, resized_right_iris), axis=1)
      irises_path = os.path.join(cropped_folder, f"{i}".zfill(4) + '.jpg')
      cv2.imwrite(irises_path, combined_iris)
    elif resized_right_iris is None:
      iris_not_detected.append(right_eye_path)
    else:
      iris_not_detected.append(left_eye_path)

# Print the list of images where the iris was not detected
print("Images with iris not detected:")
for image_path in iris_not_detected:
    print(image_path)


Images with iris not detected:
/content/Pre-MaskRCNN/right_eye_14.jpeg
/content/Pre-MaskRCNN/right_eye_20.jpeg
/content/Pre-MaskRCNN/right_eye_34.jpeg
/content/Pre-MaskRCNN/right_eye_43.jpeg
/content/Pre-MaskRCNN/right_eye_81.jpeg
/content/Pre-MaskRCNN/right_eye_82.jpeg
/content/Pre-MaskRCNN/right_eye_97.jpeg
/content/Pre-MaskRCNN/right_eye_140.jpeg
/content/Pre-MaskRCNN/right_eye_152.jpeg
/content/Pre-MaskRCNN/right_eye_181.jpeg
/content/Pre-MaskRCNN/right_eye_249.jpeg
/content/Pre-MaskRCNN/right_eye_264.jpeg
/content/Pre-MaskRCNN/right_eye_275.jpeg
/content/Pre-MaskRCNN/right_eye_296.jpeg
/content/Pre-MaskRCNN/right_eye_299.jpeg
/content/Pre-MaskRCNN/right_eye_322.jpeg
/content/Pre-MaskRCNN/right_eye_334.jpeg
/content/Pre-MaskRCNN/right_eye_386.jpeg
/content/Pre-MaskRCNN/right_eye_448.jpeg
/content/Pre-MaskRCNN/right_eye_456.jpeg
/content/Pre-MaskRCNN/left_eye_481.jpeg
/content/Pre-MaskRCNN/left_eye_563.jpeg
/content/Pre-MaskRCNN/right_eye_590.jpeg
/content/Pre-MaskRCNN/left_eye_591.

In [None]:
# Zip final dataset
!zip -r '/gdrive/MyDrive/Deep Learning F23 Final Project/Datasets/dataset.zip' '/content/dataset'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/dataset/6849.jpg (deflated 6%)
  adding: content/dataset/7689.jpg (deflated 4%)
  adding: content/dataset/7545.jpg (deflated 5%)
  adding: content/dataset/6409.jpg (deflated 5%)
  adding: content/dataset/8593.jpg (deflated 4%)
  adding: content/dataset/4904.jpg (deflated 5%)
  adding: content/dataset/3084.jpg (deflated 5%)
  adding: content/dataset/6269.jpg (deflated 5%)
  adding: content/dataset/6617.jpg (deflated 5%)
  adding: content/dataset/2631.jpg (deflated 4%)
  adding: content/dataset/5675.jpg (deflated 5%)
  adding: content/dataset/4817.jpg (deflated 4%)
  adding: content/dataset/1729.jpg (deflated 4%)
  adding: content/dataset/5420.jpg (deflated 4%)
  adding: content/dataset/1055.jpg (deflated 5%)
  adding: content/dataset/9644.jpg (deflated 6%)
  adding: content/dataset/8909.jpg (deflated 4%)
  adding: content/dataset/9033.jpg (deflated 4%)
  adding: content/dataset/0281.jpg (deflated 4%)
  ad