# Preprocessing:

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import argparse
import os
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torch.autograd import Variable
from tqdm import tqdm
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import zipfile
import os
import torchvision.transforms as transforms
import argparse
from tqdm import tqdm
import os
import PIL.Image as Image
import torch
import torchvision.models as models
import skimage.io as io
from skimage.transform import rotate, AffineTransform, warp
from skimage.util import random_noise
from skimage.filters import gaussian
from torch.utils.data import TensorDataset, DataLoader
from PIL import Image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision
import cv2
import natsort
from functools import partial
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
parser = argparse.ArgumentParser(description='RecVis A3 training script')
parser.add_argument('--data', type=str, default='/content/drive/My Drive/bird_dataset/', metavar='D')
parser.add_argument('--batch-size', type=int, default=16, metavar='B')
parser.add_argument('--epochs', type=int, default=20, metavar='N')
parser.add_argument('--lr', type=float, default=0.0001, metavar='LR')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M')
parser.add_argument('--seed', type=int, default=1, metavar='S')
parser.add_argument('--log-interval', type=int, default=10, metavar='N')
parser.add_argument('--experiment', type=str, default='/content/drive/My Drive/bird_dataset/experiment/', metavar='E')
args = parser.parse_args("")
use_cuda = torch.cuda.is_available()
torch.manual_seed(args.seed)

ds_directory = args.data


# Create experiment folder
if not os.path.isdir(args.experiment):
    os.makedirs(args.experiment)

## Data augmentation:


### Some customized preprocessing functions :

In [None]:
def square_padding(image):

  """Padding images to make them square to avoid resizing deformation"""


  image = np.array(image).astype( np.uint8)
  width = image.shape[1]
  height = image.shape[0]
  final_shape = max(height,width)
  
  img_target = np.zeros((final_shape, final_shape,3), np.uint8)
  if height == width:
    return Image.fromarray(image)
  elif height > width:
      location = (height-width)/2
      img_target[:,int(location):int(location+width),:] = image
  else:
      location = (width-height)/2
      img_target[int(location):int(location+height),:,:] = image
  return Image.fromarray(img_target)

def gaussian_blur(img):

    """Blurs image with a gaussian filter"""


    image = np.array(img)
    image_blur = cv2.GaussianBlur(image,(65,65),1)
    new_image = image_blur
    return new_image


def crop(image , precentage =0.1):

  """Crops image borders"""


  image = np.array(image).astype( np.uint8)
  width = image.shape[1]
  height = image.shape[0]
  t_height,t_width = int((1-precentage)*height),int((1-precentage)*width)
  img_target = np.zeros((t_height, t_width,3), np.uint8)
  img_target = image[int((precentage/2)*t_height):int(t_height -((precentage/2)*t_height)),int((precentage/2)*t_width):int(t_width -((precentage/2)*t_width)),:]
  return Image.fromarray(img_target)

### Transformations

In [None]:
transforms_for_augumentation = transforms.Compose([
    square_padding,
    gaussian_blur,
    transforms.Resize((320,320)),
    transforms.CenterCrop(300),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
    transforms.RandomAffine(20, translate=(0.1,0.1), scale=None, shear=None),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomPerspective()
    ])

In [None]:
"""Saving transformed data"""

dir_folder = os.listdir(ds_directory+'/train_images')
images = datasets.ImageFolder(f'{ds_directory}/train_images',transform = transforms_for_augumentation)
dir_folder = natsort.natsorted(dir_folder,reverse=False)
for ind in range(len(data)):
    img, label = data[ind] 
    img = transforms.ToPILImage()(img)
    try:
     img.save(fp=os.path.join(f'{ds_directory}/train_images/{dir_folder[label]}', f'{ind:02d}-aug-{int(label):02d}.jpg'))
    except OSError:
     os.mkdir(f'{ds_directory}/train_images/{dir_folder[label]}/')
     img.save(fp=os.path.join(f'{ds_directory}/train_images/{dir_folder[label]}/', f'{ind:02d}-aug-{int(label):02d}.jpg'))


## Applying Fast RCNN to crop images

In [None]:
coco_names = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]


def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')



def detect_bird(image, model, detection_threshold):

    """Returns cropped bird image if the trust level bigger than the detection_threshold """

    index = None

    model.eval()

    im = image.unsqueeze(0)

    outputs = model(im) 

    pred_classes = [coco_names[i] for i in outputs[0]['labels'].cpu().numpy()]

    pred_scores = outputs[0]['scores'].detach().cpu().numpy()

    if 'bird' in pred_classes :
      index=pred_classes.index('bird')
      max_score_bird = pred_scores[index]
      if max_score_bird < detection_threshold:
        return transforms.ToPILImage()(image)

      pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
      box = pred_bboxes[index].astype(np.int32)

    
      image = transforms.ToPILImage()(image)
      image=image.crop(box)

      return image
    
    return transforms.ToPILImage()(image)


In [None]:
FRCNN = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

data_transforms_FRCNN = transforms.Compose([
    square_padding,
    transforms.Resize((400,400)),
    transforms.ToTensor()
    ])


""" Saving cropped test images"""

ds_directory = args.data
dir_folder = os.listdir(ds_directory+'/test_images/mistery_category')
dir_folder = natsort.natsorted(dir_folder,reverse=False)


for f in dir_folder:
    if 'jpg' in f:
        img = data_transforms_FRCNN(pil_loader(f'{ds_directory}/test_images/mistery_category/' + '/' + f))
        img = detect_bird(img, FRCNN, 0.7)
        try:
          img.save(fp=os.path.join(f'{ds_directory}/test_crp/mistery_category/', f'{f}'))
        except OSError:
          os.mkdir(f'{ds_directory}/test_crp/mistery_category/')
          img.save(fp=os.path.join(f'{ds_directory}/test_crp/mistery_category/', f'{f}'))


""" Saving cropped train images"""


dir_folder = os.listdir(ds_directory+'/train_images')
images = datasets.ImageFolder(f'{ds_directory}/train_images',transform = data_transforms_FRCNN)
dir_folder = natsort.natsorted(dir_folder,reverse=False)
for ind in range(len(images)):
    img, label = images[ind] 
    img = detect_bird(img, FRCNN, 0.7)
    try:
     img.save(fp=os.path.join(f'{ds_directory}/train_crp/{dir_folder[label]}', f'{ind:02d}-crp-{int(label):02d}.jpg'))
    except OSError:
     os.mkdir(f'{ds_directory}/train_crp/{dir_folder[label]}/')
     img.save(fp=os.path.join(f'{ds_directory}/train_crp/{dir_folder[label]}/', f'{ind:02d}-crp-{int(label):02d}.jpg'))


## Using segmentation to delete backround.

For this task we use Fully-Convolutional Network model with a ResNet-101 backbone on cropped images.

In [None]:
FCNRES = torch.hub.load('pytorch/vision:v0.6.0', 'fcn_resnet101', pretrained=True)
FCNRES.eval()


preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

images = datasets.ImageFolder(f'{ds_directory}/train_crp')
dir_folder = os.listdir(ds_directory+'/train_crp')
dir_folder = natsort.natsorted(dir_folder,reverse=False)

for i in range(len(images)):

  """Returns  """

  image,label = images[i]

  img = preprocess(image)

  img = img.unsqueeze(0) 
  
  output = FCNRES(img)['out'][0]
  
  output_predictions = output.argmax(0)

  output_predictions[output_predictions>0] = 1

  res = cv2.bitwise_and(image,image,mask = output_predictions)

  # Saving image

  try:
     res.save(fp=os.path.join(f'{ds_directory}/train_wb/{dir_folder[label]}', f'{ind:02d}-w-{int(label):02d}.jpg'))
  except OSError:
     os.mkdir(f'{ds_directory}/train_wb/{dir_folder[label]}/')
     res.save(fp=os.path.join(f'{ds_directory}/train_wb/{dir_folder[label]}', f'{ind:02d}-w-{int(label):02d}.jpg'))


## Creating folds for training

In [None]:
train = datasets.ImageFolder(f'{ds_directory}/train_images')
val = datasets.ImageFolder(f'{ds_directory}/val_images')
labels_train = train.targets
labels_val = val.targets

fig, ax = plt.subplots(2, 1, figsize=(8,6))

ax[0].hist(labels_train, bins=20)  
ax[0].set_ylabel('train')
ax[0].set_xlabel('labels')

ax[1].hist(labels_val, bins=20)  
ax[1].set_ylabel('validation')
ax[1].set_xlabel('labels')
plt.show()

In [None]:
data_transforms = transforms.Compose([
    square_padding,
    transforms.Resize((300,300)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
    ])


all_data = datasets.ImageFolder(f'{ds_directory}/data_train/',transform=data_transforms)





def split_folds_dataset(dataset):
    sets = {}
    l = torch.utils.data.random_split(dataset,[int(len(dataset)/3),int(len(dataset)/3),int(len(dataset)/3)])
    for i in range(0,3):
      indx = [0,1,2]
      indx.remove(i)
      sets[f'train{i}'] = torch.utils.data.ConcatDataset([l[i] for i in indx])
      sets[f'val{i}']= l[i]
      
    return sets


def train_val_dataset(dataset, val_split=0.1):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split , shuffle= True)
    sets = {}
    sets['train'] = torch.utils.data.Subset(dataset, train_idx)
    sets['val'] = torch.utils.data.Subset(dataset, val_idx)
    return sets


folds = split_folds_dataset(all_data)

# Model: 


In [None]:
nclasses = 20 


class Net(nn.Module):
    def __init__(self, pretrained_model,num_model):
        super(Net, self).__init__()
        self.pretrained = torch.nn.Sequential(*list(pretrained_model.children())[:-1])
        self.input_size = pretrained_model.fc.in_features
        self.fc = nn.Sequential(
               nn.Dropout(p=0.6),
               nn.Linear(self.input_size, 1000),
               nn.Dropout(p=0.3),
               nn.ReLU(inplace=True),
               nn.Linear(1000, 20))


    def forward(self, x):
        x = self.pretrained(x)
        x = x.view(x.size()[0],-1)
        x = self.fc(x)
        return x

# Training :

In [None]:


def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):

        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean').cuda()
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))
    
            


def validation():
    global best_score_val
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in val_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        validation_loss += criterion(output, target).data.item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        

    validation_loss /= len(val_loader.dataset)

    if 100. * correct / len(val_loader.dataset) > best_score_val:
      model_file = args.experiment + f'/model_{num_model}.pth'
      torch.save(model.state_dict(), model_file)
      print('model saved to',model_file)
      best_score_val = float (100. *correct / len(val_loader.dataset))
    print("Best validation score", best_score_val)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),100. * correct / len(val_loader.dataset)))
 

In [None]:
load = False

for num_model in range(3):
    print("training model:",num_model)
    best_score_val = 0
    
    if num_model == 0:
      pre_model = models.resnet152(pretrained=True)
    elif num_model == 1:
      pre_model = models.resnext101_32x8d(pretrained=True)
    elif num_model == 2:
      pre_model = models.wide_resnet101_2(pretrained=True)
    else:
      raise NameError('num_model should be between 0 and 2')


    ct = 0
    for child in pre_model.children():
      ct += 1
      if 0<ct<6:
        for param in child.parameters():
            param.requires_grad = False

    model = Net(pre_model,num_model)
    
    if load:
      dir_exp = args.experiment
      
      if num_model ==0 :

        model = Net(models.resnet152(pretrained=True),num_model)
        model.load_state_dict(torch.load(dir_exp + "/model_0.pth"))

      elif num_model ==1 :

        model = Net(models.resnext101_32x8d(pretrained=True),num_model)
        model.load_state_dict(torch.load(dir_exp + "/model_1.pth"))
      elif num_model ==2 :

        model = Net(models.wide_resnet101_2(pretrained=True),num_model)
        model.load_state_dict(torch.load(dir_exp + "/model_2.pth"))


    if use_cuda:
      print('Using GPU')
      model.cuda()
    else:
      print('Using CPU')

    train_loader = torch.utils.data.DataLoader(folds[f'train{num_model}'],batch_size=args.batch_size, shuffle=True, num_workers=1)
    val_loader = torch.utils.data.DataLoader(folds[f'val{num_model}'] ,batch_size=args.batch_size, shuffle=False, num_workers=1)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.75)

    for epoch in range(20):

      for param_group in optimizer.param_groups:
              print("Current learning rate is: {}".format(param_group['lr']))      
              
      train(epoch)
      validation()
      scheduler.step()
     


training model: 0
Using GPU
Current learning rate is: 0.0001
model_saved to /content/drive/My Drive/bird_dataset/experiment//model_0.pth
Best validation score 81.31731414794922

Validation set: Average loss: 0.0384, Accuracy: 1963/2414 (81%)
Current learning rate is: 7.500000000000001e-05
model_saved to /content/drive/My Drive/bird_dataset/experiment//model_0.pth
Best validation score 87.90389251708984

Validation set: Average loss: 0.0218, Accuracy: 2122/2414 (88%)
Current learning rate is: 5.6250000000000005e-05
model_saved to /content/drive/My Drive/bird_dataset/experiment//model_0.pth
Best validation score 90.1822738647461

Validation set: Average loss: 0.0178, Accuracy: 2177/2414 (90%)
Current learning rate is: 4.21875e-05
model_saved to /content/drive/My Drive/bird_dataset/experiment//model_0.pth
Best validation score 92.08782196044922

Validation set: Average loss: 0.0159, Accuracy: 2223/2414 (92%)
Current learning rate is: 3.1640625e-05
model_saved to /content/drive/My Drive/bi

In [None]:
parser.add_argument('--outfile', type=str, default='/content/drive/My Drive/bird_dataset/experiment/kaggle.csv', metavar='D',
                    help="name of the output csv file")

args = parser.parse_args("")
use_cuda = torch.cuda.is_available()

def load_models():

  dir_exp = "/content/drive/My Drive/bird_dataset/experiment/"
  model1 = Net(models.resnet152(pretrained=True),0)
  model2 = Net(models.resnext101_32x8d(pretrained=True),1)
  model3 = Net(models.wide_resnet101_2(pretrained=True),2)
  model1.load_state_dict(torch.load(dir_exp + "/model_0.pth"))
  model2.load_state_dict(torch.load(dir_exp + "model_1.pth"))
  model3.load_state_dict(torch.load(dir_exp + "/model_2.pth"))

  return model1,model2,model3

model1,model2,model3 = load_models()

model1.eval()
model2.eval()
model3.eval()

if use_cuda:
    print('Using GPU')
    model1.cuda()
    model2.cuda()
    model3.cuda()
else:
    print('Using CPU')

Using GPU


# Pseudo labeling :

In [None]:

def pil_loader(path):
      # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
      with open(path, 'rb') as f:
          with Image.open(f) as img:
              return img.convert('RGB')


def pseudo_labeling(ds_directory,names_dir,origin_dir,saving_folder):

  """ save into a folder images with predicted labels"""


  dir_folder = os.listdir(ds_directory+names_dir)
  dir_folder = natsort.natsorted(dir_folder,reverse=False)
  
  for f in tqdm(os.listdir(ds_directory+origin_dir)):
      if 'jpg' in f:
          img = pil_loader(ds_directory+origin_dir+f)
          data = data_transforms(img)
          data = data.view(1, data.size(0), data.size(1), data.size(2))
          if use_cuda:
              data = data.cuda()
          output1 = model1(data)
          output2 = model2(data)
          output3 = model3(data)
          output = output1.data + output2.data + output3.data

          pred = int(output.max(1, keepdim=True)[1])

          # pseudo-label only images with level of confidence p > 0.99 
        
          if np.max(nn.Softmax()(output).cpu().numpy())>0.99:
            try:
              img.save(fp=os.path.join(f'{ds_directory}/{saving_folder}/{dir_folder[pred]}/',f))
            except OSError:
              os.mkdir(path=f'{ds_directory}/{saving_folder}/{dir_folder[pred]}/')
              img.save(fp=os.path.join(f'{ds_directory}/{saving_folder}/{dir_folder[pred]}/',f))
             

# Predicting test labels:

In [None]:
test_dir = args.data + '/test_crp/mistery_category'
def pil_loader(path):
      # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
      with open(path, 'rb') as f:
          with Image.open(f) as img:
              return img.convert('RGB')




def predict_test():

    output_file = open(args.outfile, "w")
    output_file.write("Id,Category\n")
    for f in tqdm(os.listdir(test_dir)):
        if 'jpg' in f:
            data = data_transforms(pil_loader(test_dir + '/' + f))
            data = data.view(1, data.size(0), data.size(1), data.size(2))
            if use_cuda:
                data = data.cuda()
            output1 = model1(data)
            output2 = model2(data)
            output3 = model3(data)
            output = output1.data + output2.data + output3.data
            pred = output.max(1, keepdim=True)[1]
            output_file.write("%s,%d\n" % (f[:-4], pred))

    output_file.close()

    print("Succesfully wrote " + args.outfile + ', you can upload this file to the kaggle competition website')



In [None]:
predict_test()

100%|██████████| 517/517 [00:43<00:00, 11.97it/s]

Succesfully wrote /content/drive/My Drive/bird_dataset/experiment/kaggle3.csv, you can upload this file to the kaggle competition website



