## INFERENCE

In [1]:
import numpy as np
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

# Very few imports. This is a pure torch solution!
import cv2
import time

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#WEIGHTS_FILE = "../working/fasterrcnn_resnet50_fpn-e7.bin"
WEIGHTS_FILE = "../input/rcnnmodel3/fasterrcnn_resnet50_fpn-e99.bin"
from PIL import Image
import PIL

# Very few imports. This is a pure torch solution!
import cv2
import time

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

In [2]:
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
    num_classes = 2  # 1 class (starfish) + background

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # Load the trained weights
    model.load_state_dict(torch.load(WEIGHTS_FILE))
    model.eval()

    model = model.to(device)
    return model

model = get_model()

In [3]:
BASE_DIR = "../input/tensorflow-great-barrier-reef/train_images/"

In [4]:
# Uncomment this if we want to submit the scores

#import greatbarrierreef
# import PIL.Image
# env = greatbarrierreef.make_env()
# iter_test = env.iter_test() 

# for (pixel_array, df_pred) in iter_test:  # iterate through all test set images
#     df_pred['annotations'] = predict(model, pixel_array)
#     env.predict(df_pred)

In [5]:
df = pd.read_csv("../input/csvfile/train-test.csv")

# Turn annotations from strings into lists of dictionaries
df['annotations'] = df['annotations'].apply(eval)

# Create the image path for the row
df['image_path'] = "video_" + df['video_id'].astype(str) + "/" + df['video_frame'].astype(str) + ".jpg"

df.head()

In [6]:
df_train, df_val = df[df['is_train']], df[~df['is_train']]

In [7]:
# The model doesn't support images with no annotations
# It raises an error that suggest that it just doesn't support them:
# V    alueError: No ground-truth boxes available for one of the images during training
# I'm dropping those images for now
# https://discuss.pytorch.org/t/fasterrcnn-images-with-no-objects-present-cause-an-error/117974/3
df_train = df_train[df_train.annotations.str.len() > 0 ].reset_index(drop=True)
df_val = df_val[df_val.annotations.str.len() > 0 ].reset_index(drop=True)

In [8]:
df_train.shape[0], df_val.shape[0]

In [9]:
class ReefDataset:

    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def can_augment(self, boxes):
        """ Check if bounding boxes are OK to augment
        
        
        For example: image_id 1-490 has a bounding box that is partially outside of the image
        It breaks albumentation
        Here we check the margins are within the image to make sure the augmentation can be applied
        """
        
        box_outside_image = ((boxes[:, 0] < 0).any() or (boxes[:, 1] < 0).any() 
                             or (boxes[:, 2] > 1280).any() or (boxes[:, 3] > 720).any())
        return not box_outside_image

    def get_boxes(self, row):
        """Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
        
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        return boxes
    
    def get_image(self, row):
        """Gets the image for a given row"""
        
        image = cv2.imread(f'{BASE_DIR}/{row["image_path"]}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image
    
    def __getitem__(self, i):

        row = self.df.iloc[i]
        image = self.get_image(row)
        boxes = self.get_boxes(row)
        
        n_boxes = boxes.shape[0]
        
        # Calculate the area
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        
        target = {
            'boxes': torch.as_tensor(boxes, dtype=torch.float32),
            'area': torch.as_tensor(area, dtype=torch.float32),
            
            'image_id': torch.tensor([i]),
            
            # There is only one class
            'labels': torch.ones((n_boxes,), dtype=torch.int64),
            
            # Suppose all instances are not crowd
            'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)            
        }

        if self.transforms and self.can_augment(boxes):
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            if n_boxes > 0:
                target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        else:
            image = ToTensorV2(p=1.0)(image=image)['image']

        return image, target

    def __len__(self):
        return len(self.df)

In [10]:
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [11]:
# Define datasets
ds_train = ReefDataset(df_train, get_train_transform())
ds_val = ReefDataset(df_val, get_valid_transform())

In [12]:
# Let's get an interesting one ;)
df_train[df_train.annotations.str.len() > 12].head()

In [13]:
image, targets = ds_train[2200]
image

In [14]:
boxes = targets['boxes'].cpu().numpy().astype(np.int32)
img = image.permute(1,2,0).cpu().numpy()
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(img,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(img);

In [15]:
def collate_fn(batch):
    return tuple(zip(*batch))

print("ds_val: ", ds_val)
dl_train = DataLoader(ds_train, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)
dl_val = DataLoader(ds_val, batch_size=8, shuffle=False, num_workers=4, collate_fn=collate_fn)
print("dl_val: ", dl_val)

In [16]:
detection_threshold = 0
import torch
from collections import Counter
#from iou import intersection_over_union

def format_prediction_string(boxes, scores):
    # Format as specified in the evaluation page
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.2f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)


def predict(model, pixel_array):
    # Predictions for a single image
    
    # Apply all the transformations that are required
    #pixel_array = pixel_array.astype(np.float32) / 255.
    tensor_img = ToTensorV2(p=1.0)(image=pixel_array)['image'].unsqueeze(0)
    
    # Get predictions
    with torch.no_grad():
        outputs = model(tensor_img.to(device))[0]
        
    #print("outputs loob: ", outputs)
    # Move predictions to cpu and numpy
    boxes = outputs['boxes'].data.cpu().numpy()
    scores = outputs['scores'].data.cpu().numpy()
    
    # Filter predictions with low score
    boxes = boxes[scores >= detection_threshold].astype(np.int32)
    scores = scores[scores >= detection_threshold]
    #print("boxes loob before: ", boxes)
    #print("scores loob before: ", scores)
    
    # Go back from x_min, y_min, x_max, y_max to x_min, y_min, w, h
    #boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
    #boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
  
    # Format results as requested in the Evaluation tab
    #return format_prediction_string(boxes, scores)
    return boxes, scores

def intersection_over_union(gt_box, pred_box):
    inter_box_top_left = [max(gt_box[0], pred_box[0]), max(gt_box[1], pred_box[1])]
    inter_box_bottom_right = [min(gt_box[0]+gt_box[2], pred_box[0]+pred_box[2]), min(gt_box[1]+gt_box[3], pred_box[1]+pred_box[3])]

    inter_box_w = inter_box_bottom_right[0] - inter_box_top_left[0]
    inter_box_h = inter_box_bottom_right[1] - inter_box_top_left[1]

    intersection = inter_box_w * inter_box_h
    union = gt_box[2] * gt_box[3] + pred_box[2] * pred_box[3] - intersection
    
    iou = intersection / union

    return iou, intersection, union

def mean_average_precision(pred_boxes, true_boxes, iou_threshold=0.5, box_format="corners", num_classes=2):
    average_precisions = []
    epsilon = 1e-6
    
    for c in range(num_classes):
        detections = []
        ground_truths = []
        
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)
        
        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)
                
        amount_bboxes = Counter([gt[0] for gt in ground_truths])
        
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)
            
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        for detection_idx, detection in enumerate(detections):
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]
            
            num_gts = len(ground_truth_img)
            best_iou = 0
            
            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )
                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            else:
                FP[detection_idx] = 1
            
        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = torch.divide(TP_cumsum,(TP_cumsum + FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([0]), recalls))
        average_precisions.append(torch.trapz(precisions, recalls))
        
    return sum(average_precisions) / len(average_precisions)
    

In [17]:
import sys
sys.path.append('../input/meanaverageprecision/')
import mAP
from collections import Counter

iou_thresholds = 0.5
form='pascal_voc'
scoreArray = []
iou_threshold = 0.5
TP = []
FP = []
TN = []
FN = []
for i in range(len(ds_val)):
    
#for i in range(0, len(ds_val), 8):
    image, targets = ds_val[i]
    boxes = targets['boxes'].cpu().numpy().astype(np.int32)
    sample = image.permute(1,2,0).cpu().numpy()
    print("i: ", i)
    #print("len boxes: ", len(boxes))
    prediction_boxes, prediction_scores = predict(model, sample)
    #mean_average_precision(prediction_boxes, boxes)
    
    #image_precision = mAP.calculate_image_precision(boxes, prediction_boxes,thresholds=iou_thresholds,form=form)
    
    best_iou = 0
    amount_bboxes = 0
    print("actual: ", boxes)
    print("prediction: ", prediction_boxes)
    print("prediction scores: ", prediction_scores)
    print("\n")
    
    for idx, ground_box in enumerate(boxes):
        #print("idx: ", idx)
        #print("ground_box: ", ground_box)
        if len(prediction_boxes) == 0:
            #print("wala")
            FN.append(1)
            #print("\n")
        else:
            for pred_box in prediction_boxes:
                #if prediction_boxes !=
                #print("i_c: ", i_c)
                #print("prediction_box ind: ", pred_box)
                IoUcomp, intersection, union = intersection_over_union(ground_box, pred_box)
                #print("IoU: ", IoUcomp)


                if IoUcomp > best_iou:
                    best_iou = IoUcomp
                    #print("best_iou 1: ", best_iou)

                    if best_iou > iou_threshold:
                        if amount_bboxes == 0:
                            TP.append(1)
                            amount_bboxes = 1

                        else:
                            FP.append(1)

                    else:
                        FP.append(1)

TP_sum = sum(TP)
FP_sum = sum(FP)
FN_sum = sum(FN)
print("TP: ", TP_sum)
print("FP: ", FP_sum)
print("FN: ", FN_sum)
recalls = TP_sum / (TP_sum + FN_sum)
precisions = TP_sum / (TP_sum + FP_sum)

print("precision: ", precisions)
print("recall: ", recalls)
print("\n")