# Imports

In [0]:
import argparse
import copy
import glob
from math import ceil, floor
import numpy as np
import os
import random
import time
from tqdm import tqdm
import zipfile

import cv2
import matplotlib.pyplot as plt
import PIL.Image as Image
from PIL import ImageOps
from skimage.io import imsave

!pip install imgaug
import imgaug as ia
import imgaug.augmenters as iaa

import torchvision.transforms as transforms
from torchvision import datasets
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models import resnet50, resnet152

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data


from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
os.chdir('/content/drive/My Drive/')

# Bird detection

In [0]:
seg_model = maskrcnn_resnet50_fpn(pretrained=True).cuda()
seg_model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]


directory = './bird_dataset'  # images to crop
name_dataset = './bird_data'  # folder that is to contain cropped images

all_images = glob.glob(directory+'/*/*/*')

bird_not_found = 0
bird_not_found_list = []

threshold = 0.5  # threshold for the detection
transform = transforms.Compose([transforms.ToTensor()])

for k in range(len(all_images)):
  
  img = Image.open(all_images[k])
  img = transform(img).cuda()
  pred = seg_model([img])
  pred_score = list(pred[0]['scores'].detach().cpu().numpy())

  pred_t = [pred_score.index(x) for x in pred_score if x>threshold]
  if pred_t != []:
    pred_t = pred_t[-1]

    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].detach().cpu().numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().cpu().numpy())]
    boxes = pred_boxes[:pred_t+1]
    pred_cls = pred_class[:pred_t+1]

    if 'bird' in pred_cls:
      row_min = ceil(boxes[0][0][1])
      row_max = floor(boxes[0][1][1])
      col_min = ceil(boxes[0][0][0])
      col_max = floor(boxes[0][1][0])

      new_img = img.cpu().permute(1, 2, 0).numpy()
      new_img = new_img[row_min:row_max, col_min:col_max, :]  # crop image

      # save new image
      new_name = all_images[k].replace(directory, name_dataset)
      os.makedirs(os.path.dirname(new_name), exist_ok=True)
      imsave(new_name, (new_img*255).astype(np.uint8))

    else:  # if no bird has been detected
      bird_not_found += 1
      bird_not_found_list.append(all_images[k])
  
  else:  # if nothing has been detected
    bird_not_found += 1
    bird_not_found_list.append(all_images[k])

print('Out of {}, {} have not been found.'.format(len(all_images), bird_not_found))

# Zero padding to make images square

In [0]:
def make_image_square(img):
  """
  Pad a dimension of the image to make it square.

  Parameters
  ----------
  img: ndarray, shape (width, height, nchannels)

  Returns
  -------
  new_img: ndarray, shape (width, width, nchannels) if width > height,
                          (height, height, nchannels) if width <= height
  """

  width, height = img.size

  if width > height:
    delta_h = width - height
    padding = (0, delta_h//2, 0, delta_h - (delta_h//2))
    new_img = ImageOps.expand(img, padding)

  elif width < height:
    delta_w = height - width
    padding = (delta_w//2, 0, delta_w - (delta_w//2), 0)
    new_img = ImageOps.expand(img, padding)

  else:
    new_img = img

  return new_img

# Data

In [0]:
class ImgAugTransform:
  """
  Class to use augmentation from the library imgaug.
  """
  def __init__(self):
    self.aug = iaa.Sequential([
        iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 1.0))),
        iaa.Sometimes(0.25, iaa.CoarseDropout(0.3, size_percent=0.2))
        ])
      
  def __call__(self, img):
    img = np.array(img)
    return self.aug.augment_image(img)

In [0]:
data_dir = './bird_data'
image_size = 320  # or 224

data_transforms = transforms.Compose([
    ImgAugTransform(),
    transforms.ToPILImage(),
    # transforms.Lambda(make_image_square),
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20, resample=Image.BILINEAR),
    transforms.ToTensor(),  # to range [0, 1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

batch_size = 4


train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/train_images',
                         transform=data_transforms),
    batch_size=batch_size, shuffle=True, num_workers=1)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/val_images',
                         transform=data_transforms),
    batch_size=batch_size, shuffle=False, num_workers=1)

# Model

In [0]:
nclasses = 20

class Resnet(nn.Module):
  """
  Class that uses ResNet50 and adds two FC layers and two Dropout layers.
  """
  
  def __init__(self):
    super(Resnet, self).__init__()
    resnet = resnet50(pretrained=True)
    modules = list(resnet.children())[:-1]  # remove the last layer
    self.resnet = nn.Sequential(*modules)
    
    self.linear1 = nn.Linear(resnet.fc.in_features, 1024)
    self.linear2 = nn.Linear(1024, nclasses)
    self.dropout1 = nn.Dropout(p=0.8)
    self.dropout2 = nn.Dropout(p=0.5)
    
  def forward(self, x):
    x = self.resnet(x)
    x = x.view(x.size(0), -1)

    x = self.dropout1(x)
    x = F.relu(self.linear1(x))
    
    x = self.dropout2(x)
    x = F.relu(self.linear2(x))

    return x

In [0]:
class Net(nn.Module):
  """
  Second class, that can use different networks and not just ResNet50.
  """
  
  def __init__(self, backbone_net):
    super(Net, self).__init__()

    self.backbone_name = backbone_net
    if backbone_net == 'resnet50':
      backbone = resnet50(pretrained=True)
    elif backbone_net == 'resnet152':
      backbone = resnet152(pretrained=True)
    elif backbone_net == 'resnext101_32x8d':
      backbone = resnext101_32x8d(pretrained=True)

    modules = list(backbone.children())[:-1]
    self.backbone = nn.Sequential(*modules)
    
    self.linear1 = nn.Linear(backbone.fc.in_features, 1024)
    self.linear2 = nn.Linear(1024, nclasses)
    self.dropout1 = nn.Dropout(p=0.9)
    self.dropout2 = nn.Dropout(p=0.4)

    self.classifier = nn.Sequential(self.dropout1, self.linear1, self.dropout2, self.linear2)
    
  def forward(self, x):
    x = self.backbone(x)
    x = x.view(x.size(0), -1)

    x = self.dropout1(x)
    x = F.relu(self.linear1(x))
    x = self.dropout2(x)
    x = F.relu(self.linear2(x))

    return x

In [0]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, use_cuda, model_file, nepochs=50, log_interval=10):
  """
  Function that trains and validates a model.
  """

  since = time.time()
  val_acc_history = []

  # keep the best model
  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(nepochs):
    for phase in ['train', 'val']:

      if phase == 'train':
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
          if use_cuda:
            data, target = data.cuda(), target.cuda()
          optimizer.zero_grad()
          output = model(data)
          loss = criterion(output, target)
          loss.backward()
          optimizer.step()
          if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                  epoch, batch_idx * len(data), len(train_loader.dataset),
                  100. * batch_idx / len(train_loader), loss.data.item()))
            
      
      else:  # if phase == 'val'
        model.eval()
        validation_loss = 0
        correct = 0
        for data, target in val_loader:
          if use_cuda:
            data, target = data.cuda(), target.cuda()
          output = model(data)
          # sum up batch loss
          validation_loss += criterion(output, target).data.item()
          # get the index of the max log-probability
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        validation_loss /= len(val_loader.dataset)
        validation_accuracy = 100. * correct / len(val_loader.dataset)

        print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
              validation_loss, correct, len(val_loader.dataset),
              validation_accuracy))
        
        if validation_accuracy > best_acc:
          best_acc = validation_accuracy
          best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

    torch.save(model.state_dict(), model_file)
    print('Saved model to ' + model_file + '.\n')

  time_elapsed = time.time() - since
  print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
  print('Best val Acc: {:4f}'.format(best_acc))

  # load best model weights
  model.load_state_dict(best_model_wts)
  torch.save(model.state_dict(), model_file)
  print('Saved model to ' + model_file + '.\n')
  
  return model, val_acc_history

# Training

In [0]:
use_cuda = torch.cuda.is_available()

# initialization
if use_cuda:
  model = Resnet().cuda()
else:
  model = Resnet()

# parameters
model_file = 'experiment/resnet50_{}.pth'.format(image_size)
nepochs = 50
log_interval = 10

# optimization
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam([
                {'params': list(model.children())[0].parameters(), 'lr': 1e-5},
               ], lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.1)

# training loop
model, val_acc_history = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, use_cuda, model_file, nepochs, log_interval)

# Evaluate

In [0]:
test_dir = data_dir + '/test_images/mistery_category'
outfile = 'experiment/resnet50_320.csv'


test_transforms = transforms.Compose([
    # transforms.Lambda(make_image_square),
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')


model.eval()
output_file = open(outfile, "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = test_transforms(pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))

        if use_cuda:
            data = data.cuda()

        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

print("Succesfully wrote " + outfile + ', you can upload this file to the kaggle competition website')