# Object Detection

## Loading Libraries

In [3]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib
import matplotlib_inline
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import matplotlib.patches as patches

# Dataset's Iteration Performance
from tqdm import tqdm

# Time
import time

# OS
import re
import sys
import json
import string
import unicodedata
from glob import glob
from io import BytesIO
from imageio import imread
from zipfile import ZipFile
import requests, zipfile, io
from urllib.request import urlopen



# SciPy
from scipy.signal import convolve

# PyTorch
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import *
from torchvision.ops import nms
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator


# IDLMAM Libraries
from idlmam import moveTo, run_epoch, set_seed, View
from idlmam import train_simple_network, set_seed, Flatten, weight_reset, train_network
from idlmam import LanguageNameDataset, pad_and_pack, EmbeddingPackable, LastTimeStep, LambdaLayer


# Scikit-Learn
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

#  IPython Display
from IPython.display import Latex
from IPython.display import display_pdf
from IPython.display import set_matplotlib_formats

  from .autonotebook import tqdm as notebook_tqdm


### Visualization Set-Up

In [4]:
%matplotlib inline

matplotlib_inline.backend_inline.set_matplotlib_formats('png', 'pdf')

### Setting Seeds

In [5]:
torch.backends.cudnn.deterministic=True

set_seed(42)

In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

### Nuclei Detection: Retrieving & Loading Data

In [8]:
data_url_zip = "https://github.com/kamalkraj/DATA-SCIENCE-BOWL-2018/blob/master/data/stage1_train.zip?raw=true"

In [None]:
import os

# Extract The Data
if not os.path.isdir('./data/stage1_train'):
    resp = urlopen(data_url_zip)
    os.makedirs("./data/stage1_train", exist_ok=True)
    zipfile = ZipFile(BytesIO(resp.read()))
    zipfile.extractall(path = './data/stage1_train')

# Get All The Image Paths
paths = glob("./data/stage1_train/*")

##### Nuclei's Dataset Class as follows:

In [None]:
class DSB2018(Dataset):
    def __init__(self, paths):
        self.paths = paths
    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):   
        img_path = glob(self.paths[idx] + "/images/*")[0]        
        mask_imgs = glob(self.paths[idx] + "/masks/*")        
        img = imread(img_path)[:,:,0:3]
        img = np.moveaxis(img, -1, 0)
        img = img/255.0
       
        masks = [imread(f)/255.0 for f in mask_imgs]
        
        final_mask = np.zeros(masks[0].shape)
        for m in masks:
            final_mask = np.logical_or(final_mask, m)
        final_mask = final_mask.astype(np.float32)
        
        img, final_mask = torch.tensor(img), torch.tensor(final_mask).unsqueeze(0) 
        img = F.interpolate(img.unsqueeze(0), (256, 256))
        final_mask = F.interpolate(final_mask.unsqueeze(0), (256, 256))
        return img.type(torch.FloatTensor)[0], final_mask.type(torch.FloatTensor)[0]

In [None]:
# Create the Dataset class object
dsb_data = DSB2018(paths)

# Original Image
plt.figure(figsize=(16,10))
plt.subplot(1, 2, 1)

# Mask Image
plt.imshow(dsb_data[0][0].permute(1,2,0).numpy())
plt.subplot(1, 2, 2)

plt.imshow(dsb_data[0][1].numpy()[0,:], cmap='gray')
plt.show()

In [None]:
# Plotting 2nd Image
plt.figure(figsize=(16,10))
plt.subplot(1, 2, 1)

# 2nd Mask
plt.imshow(dsb_data[1][0].permute(1,2,0).numpy())
plt.subplot(1, 2, 2)

plt.imshow(dsb_data[1][1].numpy()[0,:], cmap='gray')
plt.show()

### Train-Test Splitting

In [None]:
# Training Set
train_split, test_split = torch.utils.data.random_split(dsb_data, [500, len(dsb_data)-500])

# Training Loader
train_seg_loader = DataLoader(train_split, batch_size=16, shuffle=True)

# Test Loader
test_seg_loader = DataLoader(test_split,  batch_size=16)

In [None]:
# Channels - (RGB)
C = 3 

# Filters
n_filters = 32 

# Loss Function
loss_func = nn.BCEWithLogitsLoss()

### 1st Image Segmentation Network

In [None]:
# CNN Hidden Layers Helper Function:
def cnnLayer(in_filters, out_filters, kernel_size=3):
    padding = kernel_size//2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding), 
        nn.BatchNorm2d(out_filters),
        nn.LeakyReLU(), 
    )

segmentation_model = nn.Sequential(
    cnnLayer(C, n_filters), 
    *[cnnLayer(n_filters, n_filters) for _ in range(5)], 
    nn.Conv2d(n_filters, 1, (3,3), padding=1), 
)

##### Segmentation Model Training:

In [None]:
# Segmentation Model Training
seg_results = train_network(segmentation_model, 
loss_func, 
train_seg_loader, 
epochs=10, 
device=device, 
val_loader=test_seg_loader)

##### Checking the model's outcome as follows:

In [None]:
index = 6 

with torch.no_grad():
    logits = segmentation_model(test_split[index][0].unsqueeze(0).to(device))[0].cpu()
    pred = torch.sigmoid(logits) >= 0.5

# Plotting: Input, Ground Truth & Prediction
plt.figure(figsize=(16,10))
plt.subplot(1, 3, 1)

plt.imshow(test_split[index][0].permute(1,2,0).numpy(), cmap='gray') 
plt.subplot(1, 3, 2)

plt.imshow(test_split[index][1].numpy()[0,:], cmap='gray') 
plt.subplot(1, 3, 3) 

plt.imshow(pred.numpy()[0,:], cmap='gray') 

plt.annotate('Error: Hole', color="red", fontsize=20, xy=(130, 230),
            xycoords='data', xytext=(-60, 60),
            textcoords='offset points',
            arrowprops=dict(arrowstyle="->",
                            linewidth = 2.5,
                            color = 'tomato')
            )

plt.annotate('Error: Hole', color="red", fontsize=20, xy=(210, 75),
            xycoords='data', xytext=(-160, -60),
            textcoords='offset points',
            arrowprops=dict(arrowstyle="->",
                            linewidth = 2.5,
                            color = 'tomato')
            )
plt.annotate('Error: Phantom object', color="red", fontsize=20, xy=(247, 15),
            xycoords='data', xytext=(-240, -50),
            textcoords='offset points',
            arrowprops=dict(arrowstyle="->",
                            linewidth = 2.5,
                            color = 'tomato')
            )

## Transposed Convolutions for Expanding Image Size

### Implementing a Network with Transposed Convolutions

In [None]:
segmentation_model2 = nn.Sequential(
    cnnLayer(C, n_filters), 
    cnnLayer(n_filters, n_filters),
    nn.MaxPool2d(2), 
    cnnLayer(n_filters, 2*n_filters),
    cnnLayer(2*n_filters, 2*n_filters),
    cnnLayer(2*n_filters, 2*n_filters),
    nn.ConvTranspose2d(2*n_filters, n_filters, (3,3), padding=1, output_padding=1, stride=2),
    nn.BatchNorm2d(n_filters),
    nn.LeakyReLU(),
    cnnLayer(n_filters, n_filters),
    nn.Conv2d(n_filters, 1, (3,3), padding=1), #Shape is now (B, 1, W, H)
)

##### Transposed Model Training:

In [None]:
seg_results2 = train_network(segmentation_model2, 
loss_func, 
train_seg_loader, 
epochs=10, 
device=device, 
val_loader=test_seg_loader)

##### Contrasting Outcomes as follows:

In [None]:
index = 6 

with torch.no_grad():
    pred = segmentation_model2(test_split[index][0].unsqueeze(0).to(device))[0].cpu()
    pred = torch.sigmoid(pred) >= 0.5


# Plotting: Input, Ground Truth & Prediction
plt.figure(figsize=(16,10))
plt.subplot(1, 3, 1)

# Original Plotting
plt.imshow(test_split[index][0].permute(1,2,0).numpy(), cmap='gray')  #
plt.subplot(1, 3, 2)

# Ground Truth
plt.imshow(test_split[index][1].numpy()[0,:], cmap='gray') 
plt.subplot(1, 3, 3)

# Prediction
plt.imshow(pred.numpy()[0,:], cmap='gray')

In [None]:
del segmentation_model
del segmentation_model2

In [None]:
# CNN Model Results
sns.lineplot(x='epoch', 
y='val loss', 
data=seg_results, 
label='CNN')

# CNN with Transposed-Conv Model Results
sns.lineplot(x='epoch', 
y='val loss', 
data=seg_results2, 
label='CNN w/ transposed-conv')

plt.grid(True)
plt.show()

## U-Net: Looking at Fine & Coarse Details

### Implementing U-Net

In [None]:
class UNetBlock2d(nn.Module): 
    def __init__(self, in_channels, mid_channels, out_channels=None, layers=1, sub_network=None, filter_size=3):
        super().__init__()
        
        in_layers = [cnnLayer(in_channels, mid_channels, filter_size)]
        
        if sub_network is None:
            inputs_to_outputs = 1
        else:
            inputs_to_outputs = 2

        out_layers = [ cnnLayer(mid_channels*inputs_to_outputs, mid_channels, filter_size)]
        
        for _ in range(layers-1):
            in_layers.append(cnnLayer(mid_channels, mid_channels, filter_size))
            out_layers.append(cnnLayer(mid_channels, mid_channels, filter_size))
        if out_channels is not None:
            out_layers.append(nn.Conv2d(mid_channels, out_channels, 1, padding=0))
    
        self.in_model = nn.Sequential(*in_layers)
        if sub_network is not None:
            self.bottleneck = nn.Sequential(
                nn.MaxPool2d(2), 
                sub_network, 
                nn.ConvTranspose2d(mid_channels, mid_channels, filter_size, padding=filter_size//2, output_padding=1, stride=2)
            )
        else:
            self.bottleneck = None
        self.out_model = nn.Sequential(*out_layers)
        
    
    def forward(self, x):
        full_scale_result = self.in_model(x) 
        if self.bottleneck is not None:
            bottle_result = self.bottleneck(full_scale_result)
            full_scale_result = torch.cat([full_scale_result, bottle_result], dim=1)
        return self.out_model(full_scale_result)

##### U-Net Model as follows:

In [None]:
unet_model = nn.Sequential(
    UNetBlock2d(3, 32, layers=2, sub_network=
        UNetBlock2d(32, 64, out_channels=32, layers=2, sub_network=
            UNetBlock2d(64, 128, out_channels=64, layers=2)
        ),
    ),
    
    nn.Conv2d(32, 1, (3,3), padding=1), #Shape is now (B, 1, W, H)
)

##### U-Net Model Training

In [None]:
unet_results = train_network(unet_model, 
loss_func, 
train_seg_loader, 
epochs=10, 
device=device, 
val_loader=test_seg_loader)

In [None]:
# CNN Model Results
sns.lineplot(x='epoch', y='val loss', data=seg_results, label='CNN')

# CNN with Transposed-Conv Model Results
sns.lineplot(x='epoch', y='val loss', data=seg_results2, label='CNN w/ transposed-conv')

# U-Net Model Results
sns.lineplot(x='epoch', y='val loss', data=unet_results, label='UNet')

plt.grid(True)
plt.show()

## Object Detection with Bounding Boxes

### Faster R-CNN

- Backbone

- Region Proposal Network (RPN)

- Region of Interest (ROI) Pooling

### Using Faster R-CNN in PyTorch

In [None]:
# Setting Special Seed
set_seed(42)

In [None]:
class Class2Detect(Dataset):
    def __init__(self, dataset, toSample=3, canvas_size=100):
        self.dataset = dataset
        self.toSample = toSample
        self.canvas_size = canvas_size

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        
        boxes = []
        labels = []
        
        final_size = self.canvas_size
        img_p = torch.zeros((final_size,final_size), dtype=torch.float32)
        for _ in range(np.random.randint(1,self.toSample+1)):
            
            img, label = self.dataset[np.random.randint(0,len(self.dataset))]
            _, img_h, img_w = img.shape
            offsets = np.random.randint(0,final_size-np.max(img.shape),size=(4))
            offsets[1] = final_size - img.shape[1] - offsets[0]
            offsets[3] = final_size - img.shape[2] - offsets[2]
            
            with torch.no_grad():
                img_p = img_p + F.pad(img, tuple(offsets))
            
            xmin = offsets[0]
            xmax = offsets[0]+img_w
            ymin = offsets[2]
            ymax = offsets[2]+img_h
            boxes.append( [xmin, ymin, xmax, ymax] )
            labels.append( label )

            
        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        
        return img_p, target

Implementing a R-CNN Collate Function

In [None]:
# Train Set
train_data = Class2Detect(torchvision.datasets.MNIST("./", train=True, transform=transforms.ToTensor(), download=True))

# Test Set
test_data = Class2Detect(torchvision.datasets.MNIST("./", train=False, transform=transforms.ToTensor(), download=True))

In [None]:
def collate_fn(batch):
    imgs = []
    labels = []
    for img, label in batch:
        imgs.append(img)
        labels.append(label)
    return imgs, labels

In [None]:
train_loader = DataLoader(train_data, 
batch_size=128, 
shuffle=True, 
collate_fn=collate_fn)

##### Examining The MNIST Detection Data

In [None]:
x, y = train_data[0] 

imshow(x.numpy()[0,:])

In [None]:
print(y) 
print("Boxes: ", y['boxes']) 
print("Labels: ", y['labels']) 

##### Defining a Faster R-CNN Model

In [None]:
# Channels
C = 1

# Classes
classes = 10

# Backbone Filters
n_filters = 32

In [None]:
backbone = nn.Sequential(
    cnnLayer(C, n_filters),    
    cnnLayer(n_filters, n_filters),
    cnnLayer(n_filters, n_filters),
    nn.MaxPool2d((2,2)),
    cnnLayer(n_filters, 2*n_filters),
    cnnLayer(2*n_filters, 2*n_filters),
    cnnLayer(2*n_filters, 2*n_filters),
    nn.MaxPool2d((2,2)),
    cnnLayer(2*n_filters, 4*n_filters),
    cnnLayer(4*n_filters, 4*n_filters),
)

# Backbone Output R-CNN Channels
backbone.out_channels = n_filters*4

In [None]:
# Anchor Generator
anchor_generator = AnchorGenerator(sizes=((32),), aspect_ratios=((1.0),)) 

# Backbone's Feature Map
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)

##### Model's Training

In [None]:
model = FasterRCNN(backbone, 
num_classes=10, 
image_mean = [0.5], 
image_std = [0.229], 
min_size=100, 
max_size=100, 
rpn_anchor_generator=anchor_generator, 
box_roi_pool=roi_pooler)

##### Implementing a Faster R-CNN Training Loop

In [None]:
model = model.train()

model.to(device)

optimizer = torch.optim.AdamW(model.parameters())

for epoch in tqdm(range(1), desc="Epoch", disable=False):
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc="Train Batch", leave=False, disable=False):
        inputs = moveTo(inputs, device)
        labels = moveTo(labels, device)

        optimizer.zero_grad()
        losses = model(inputs, labels)
        loss = 0
        for partial_loss in losses.values():
            loss += partial_loss
        loss.backward()
        
        optimizer.step()

        running_loss += loss.item()

In [None]:
model = model.eval()

model = model.to(device)

In [None]:
set_seed(161)

In [None]:
x, y = test_data[0]

# Ideal Ground Truth Outcome
print(y) 

In [None]:
with torch.no_grad():
    pred = model([x.to(device)])

##### Examining The Results

In [None]:
print(pred)

In [None]:
def plotDetection(ax, abs_pos, label=None):
    x1, y1, x2, y2 = abs_pos
    rect = patches.Rectangle((x1,y1),x2-x1,y2-y1,linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
    if label is not None:
        plt.text(x1+0.5, y1, label, color='black', bbox=dict(facecolor='white', edgecolor='white', pad=1.0))
    
    return 

def showPreds(img, pred):
    fig,ax = plt.subplots(1)
    ax.imshow(img.cpu().numpy()[0,:])
    boxes = pred['boxes'].cpu()
    labels = pred['labels'].cpu()
    scores = pred['scores'].cpu()
    
    num_preds = labels.shape[0]
    for i in range(num_preds):
        plotDetection(ax, boxes[i].cpu().numpy(), label=str(labels[i].item()))
    
    plt.show()

In [None]:
showPreds(x, pred[0])

ShowPreds outcome has shown spurious overlapping

### Suppressing Overlapping Boxes

- Non-Maximum Suppression (NMS)

- Intersection over Union (IoU)

In [None]:
print(pred[0]['boxes'])

In [None]:
print(pred[0]['scores'])

In [None]:
print(nms(pred[0]['boxes'], pred[0]['scores'], 0.5))

In [None]:
def showPreds(img, pred, iou_max_overlap=0.5, min_score=0.05, label_names=None):
    fig,ax = plt.subplots(1)
    img = img.cpu().numpy()
    if img.shape[0] == 1:
        ax.imshow(img[0,:])
    else:
        ax.imshow(np.moveaxis(img, 0, 2))
    boxes = pred['boxes'].cpu()
    labels = pred['labels'].cpu()
    scores = pred['scores'].cpu()
    
    selected = nms(boxes, scores, iou_max_overlap).cpu().numpy()
    
    for i in selected:
        if scores[i].item() > min_score:
            if label_names is None:
                label = str(labels[i].item())
            else:
                label = label_names[labels[i].item()]
            plotDetection(ax, boxes[i].cpu().numpy(), label=label)
    
    plt.show()

## Using The Pretrained Faster R-CNN

In [None]:
rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
rcnn = rcnn.eval()

##### COCO Instance Category Names

In [None]:
NAME = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [None]:
urls = [
    "https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/10best-cars-group-cropped-1542126037.jpg",
    "https://miro.medium.com/max/5686/1*ZqJFvYiS5GmLajfUfyzFQA.jpeg",
    "https://www.denverpost.com/wp-content/uploads/2018/03/virginia_umbc_001.jpg?w=910"
]

In [None]:
response = requests.get(urls[0])
img = Image.open(BytesIO(response.content))

In [None]:
img = np.asarray(img)/256.0
img = torch.tensor(img, dtype=torch.float32).permute((2,0,1))

with torch.no_grad():
    pred = rcnn([img]) 

In [None]:
showPreds(img, pred[0], iou_max_overlap=0.15, min_score=0.15, label_names=NAME)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=2fe63438-cd94-4397-bd67-8cd5a8e03d33' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>