# **Bounding Boxes Prediction - Visualization**

In this .ipynb we'll show how to visualize the bounding boxes predicted by the neural network model. The code acts on Swimming Pools, but it is analogue for the Water Tanks - **the user needs to perform the necessary changes** 

## ***Preamble***

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%shell

# Install pycocotools
git clone https://github.com/cocodataset/cocoapi.git
cd cocoapi/PythonAPI
python setup.py build_ext install

## ***Dataset***

In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import xml.etree.ElementTree as ET

# Dataloader

class PoolsDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms     
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.anno = list(sorted(os.listdir(os.path.join(root, "annotations"))))
              
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        img = Image.open(img_path).convert("RGB")
        
        tree = ET.parse(os.path.join(self.root, "annotations", self.anno[idx]))
        boxes = []
        num = 0
        for obj in tree.findall('object'):
            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)         
            boxes.append([xmin,ymin,xmax,ymax])
    
        boxes = torch.as_tensor(boxes, dtype=torch.float32) 
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])  
        
        n = len(boxes)
        labels = torch.ones((n,), dtype=torch.int64)
        iscrowd = torch.zeros((n,), dtype=torch.int64)
        
        image_id = torch.tensor([idx])
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
  
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        
        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
from zipfile import ZipFile 
  
file = '/content/drive/My Drive/Pool Detection/datasets/pool.zip'

with ZipFile(file, 'r') as zip:
  print('Extracting all the files now...') 
  zip.extractall() 
  print('Done!') 

Extracting all the files now...
Done!


In [None]:
%%shell

git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [None]:
dataset_test = PoolsDataset('pool/test', get_transform(train=False))

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

## ***Model Initialization***

In [None]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((8, 16, 32, 64, 128, 256, 512),),
                                   aspect_ratios=((1.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0"],
                                                output_size=7,
                                                sampling_ratio=2)
model = FasterRCNN(backbone,
                   num_classes=2,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler,
                   min_size=1280)

In [None]:
# upload the neural network model 
path = '/content/drive/My Drive/Pool Detection/faster_pool/ft_pool_campinas15.pt'

model.load_state_dict(torch.load(path))

## ***Testing***

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

# pick one image from the test set
img, _ = dataset_test[0] # _ : coordenadas

# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])
    
prediction

## ***Visualization***

In [None]:
def prediction(img, threshold):

  pred_boxes = list()
  pred_score = list()

  image = torchvision.transforms.ToTensor()(img)

  with torch.no_grad():
    pred = model([image.to(device)])
  c_pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach())] 
  c_pred_score = list(pred[0]['scores'].detach())
  if (len(c_pred_score) != 0):
    if (c_pred_score[0] > threshold):
      pred_t = [c_pred_score.index(x) for x in c_pred_score if x > threshold][-1] 
      c_pred_score = c_pred_score[:pred_t+1]
      c_pred_boxes = c_pred_boxes[:pred_t+1]
      pred_boxes.extend(c_pred_boxes)
      pred_score.extend(c_pred_score)

  return pred_boxes

In [None]:
import cv2
import matplotlib.pyplot as plt
from skimage import io

def geration_bb (img, threshold, rect_th, path):
  boxes = prediction(img, threshold)
  for i in range(len(boxes)):
      cv2.rectangle(img, boxes[i][0], boxes[i][1], color= (255, 0, 0), thickness=rect_th)
  plt.figure(figsize=(10,10))
  plt.imshow(img)
  plt.xticks([])
  plt.yticks([])
  plt.show()

  # for to save the prediction figure 
  io.imsave(path, img)

### ***Individual Patch***

In [None]:
# the path should be to an image/patch of a specific region inside crops240 folder
path = '/content/drive/My Drive/Dataset/crops240/controle/4080-1200_controle.jpg'
image = io.imread(path)

# Visualize the predictions with threshold = 0.8 (change the parameter for others results)
path_save = '/content/drive/My Drive/Pool Detection/results/prediction_patch.png'
geration_bb(image, threshold=0.8, rect_th=1, path=path_save)

### ***Entire Region***

In [None]:
import numpy as np
from skimage import io

tag = 'alto'
row = col = 240
for r in range(0,6000,row):
  for c in range(0,6000,col):
    image = io.imread(f'/content/drive/My Drive/Dataset/crops240/{tag}/{r}-{c}_{tag}.jpg')
    path_save = f'/content/drive/My Drive/Pool Detection/results/{tag}/result_{r}-{c}.png'
    geration_bb(image, threshold = 0.8, rect_th = 1, path = path_save)

In [None]:
import numpy as np
from skimage import io

row = col = 240
pasted = np.zeros((6000,6000,3))
for r in range(0,6000,row):
  for c in range(0,6000,col):
    image = io.imread(f'/content/drive/My Drive/Pool Detection/results/{tag}/result_{r}-{c}.png')

    if(image.shape != (240, 240, 3)):
      print('Error: r = {}, c = {}'.format(row, col))

    pasted[r:(r+row),c:(c+col),:] = image

io.imsave(f'/content/drive/My Drive/Pool Detection/results/{tag}_th80.png', pasted)