## Reference:
* https://www.kaggle.com/phhasian0710/create-bounding-box-images-whale-recognition
* https://www.kaggle.com/satian/seresnext101-pytorch-starter
* https://www.kaggle.com/martinpiotte/bounding-box-model

* Experimented with ResNet 18 instead of the SEResNet used by the original author 0.292
* Experimented with Xception too. 0.352


In [None]:
%matplotlib inline
#%reload_ext autoreload        <------------— comment out 
#%autoreload 0   

import os
import warnings
import pandas as pd
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import transforms


import cv2
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


warnings.filterwarnings("ignore",category=DeprecationWarning)

In [None]:
os.listdir('../input')

In [None]:
train_df = pd.read_csv('../input/humpback-whale-identification/train.csv')
sub = pd.read_csv('../input/humpback-whale-identification/sample_submission.csv')

best_model_path = '../input/bbox-seresnext101-pytorch-0-657/best_model.pth'

TRN_IMGS_DIR = '../input/humpback-whale-identification/train/'
TST_IMGS_DIR = '../input/humpback-whale-identification/test/'

BBOX_TRAIN_CSV = '../input/box-whale/bounding/bounding_boxes_train.csv'
BBOX_TEST_CSV = '../input/box-whale/bounding/bounding_boxes_test.csv'
bbox_train = pd.read_csv(BBOX_TRAIN_CSV)
bbox_test = pd.read_csv(BBOX_TEST_CSV)
bbox_train = train_df.join(bbox_train.set_index('Image'), on='Image')

## Creating Labels

In [None]:
def prepare_labels(y):
    # From here: https://www.kaggle.com/pestipeti/keras-cnn-starter
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)

    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

    y = onehot_encoded
    return y, label_encoder

In [None]:
y, label_encoder = prepare_labels(train_df['Id'])
NCLASSES = len(y[0])

## Creating Augmentations

In [None]:
trn_trnsfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(degrees=30),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

tst_trnsfms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

## Creating PyTorch Dataloader

In [None]:
class WhaleDataLoader(Dataset):
    def __init__(self, image_folder, process='train', bbox=None, df=None, transform=None, y=None):
        self.image_folder = image_folder
        self.imgs_list = [img for img in os.listdir(image_folder)]
        self.process = process
        self.transform = transform
        self.y = y
        if self.process == 'train':
            self.df = df.values
        self.bbox = bbox.values
    
    def __len__(self):
        return len(self.imgs_list)
    
    def __getitem__(self, idx):
        if self.process == 'train':
            img_name = os.path.join(self.image_folder, self.df[idx][0])
            label = self.y[idx]
        
        elif self.process == 'test':
            img_name = os.path.join(self.image_folder, self.imgs_list[idx])
            label = np.zeros((NCLASSES,))
        
        img = Image.open(img_name).convert('RGB')
        if self.process == 'train':
            area = (self.bbox[idx][2], self.bbox[idx][3], self.bbox[idx][4], self.bbox[idx][5])
        elif self.process == 'test':
            area = (self.bbox[idx][1], self.bbox[idx][2], self.bbox[idx][3], self.bbox[idx][4])
        img = img.crop(area)
        
        img = self.transform(img)
        if self.process == 'train':
            return img, label
        elif self.process == 'test':
            return img, label, self.imgs_list[idx]

In [None]:
train_dataloader = WhaleDataLoader(image_folder = TRN_IMGS_DIR, process='train', bbox=bbox_train, df=train_df, transform=trn_trnsfms, y=y)
test_dataloader = WhaleDataLoader(image_folder = TST_IMGS_DIR, process='test', bbox=bbox_test,transform=tst_trnsfms)

In [None]:
batch_size = 16
num_workers = 4

train_loader = DataLoader(train_dataloader, batch_size=batch_size, num_workers=num_workers, pin_memory=True, shuffle=True)
test_loader = DataLoader(test_dataloader, batch_size=batch_size, num_workers=num_workers, pin_memory=True)

# Creating Xception Model


In [None]:
from __future__ import print_function, division, absolute_import
from collections import OrderedDict
import math

import torch.nn as nn
from torch.utils import model_zoo

pretrained_settings = {
    'xception': {
        'imagenet': {
            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth',
            'input_space': 'RGB',
            'input_size': [3, 299, 299],
            'input_range': [0, 1],
            'mean': [0.5, 0.5, 0.5],
            'std': [0.5, 0.5, 0.5],
            'num_classes': 1000,
            'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
        }
    }
}

class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()
        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x

    
class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()
        
        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip = None
        
        self.relu = nn.ReLU(inplace=True)
        rep = []
        
        filters = in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)
    
    def forward(self, inp):
        x = self.rep(inp)
        
        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp
        
        x += skip
        return x

class Xception(nn.Module):
    """
    Xception optimized for the ImageNet dataset, as specified in
    https://arxiv.org/pdf/1610.02357.pdf
    """
    def __init__(self, num_classes=1000):
        """ Constructor
        Args:
            num_classes: number of classes
        """
        super(Xception, self).__init__()
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        #do relu here

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        #do relu here
        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)

        self.fc = nn.Linear(2048, num_classes)

        # #------- init weights --------
        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d):
        #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        #         m.weight.data.normal_(0, math.sqrt(2. / n))
        #     elif isinstance(m, nn.BatchNorm2d):
        #         m.weight.data.fill_(1)
        #         m.bias.data.zero_()
        # #-----------------------------

    def features(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        return x

    def logits(self, features):
        x = self.relu(features)

        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, input):
        x = self.features(input)
        x = self.logits(x)
        return x
    


# def initialize_pretrained_model(model, num_classes, settings):
#     assert num_classes == settings['num_classes'], \
#         'num_classes should be {}, but is {}'.format(
#             settings['num_classes'], num_classes)
#     model.load_state_dict(model_zoo.load_url(settings['url']))
#     model.input_space = settings['input_space']
#     model.input_size = settings['input_size']
#     model.input_range = settings['input_range']
#     model.mean = settings['mean']
#     model.std = settings['std']

def xception(num_classes=1000, pretrained='imagenet'):
    model = Xception(num_classes=num_classes)
    if pretrained:
        settings = pretrained_settings['xception'][pretrained]
        assert num_classes == settings['num_classes'], \
            "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)

        model = Xception(num_classes=num_classes)
        model.load_state_dict(model_zoo.load_url(settings['url']))

        model.input_space = settings['input_space']
        model.input_size = settings['input_size']
        model.input_range = settings['input_range']
        model.mean = settings['mean']
        model.std = settings['std']

    # TODO: ugly
    model.last_linear = model.fc
    del model.fc
    return model

def save_checkpoint(state, is_best, fpath='gdrive/My Drive/checkpoint.pth'):
    torch.save(state, fpath)
    if is_best:
        torch.save(state, 'best_model.pth')
class Xception_base(nn.Module):
    """
    Xception optimized for the ImageNet dataset, as specified in
    https://arxiv.org/pdf/1610.02357.pdf
    """
    def __init__(self, num_classes=1000, pretrained='imagenet'):
        """ Constructor
        Args:
            num_classes: number of classes
        """
        super(Xception_base, self).__init__()
        if not pretrained is None:
            print("here")
            base_num_classes = 1000
            self.base_model = Xception(num_classes = base_num_classes)
            settings = pretrained_settings['xception'][pretrained]
            assert base_num_classes == settings['num_classes'], \
                "num_classes should be {}, but is {}".format(settings['num_classes'], num_classes)
    #         model = Xception(num_classes=num_classes)
            self.base_model.load_state_dict(model_zoo.load_url(settings['url']))
            self.base_model.input_space = settings['input_space']
            self.base_model.input_size = settings['input_size']
            self.base_model.input_range = settings['input_range']
            self.base_model.mean = settings['mean']
            self.base_model.std = settings['std']
        else:
            self.base_model = Xception(num_classes=num_classes)
        del self.base_model.fc #= SeparableConv2d(2048,1536,3,1,1)
        self.conv5 = SeparableConv2d(2048, 3072,3,1,1)
        self.bn5 = nn.BatchNorm2d(3072)
        self.conv6 = SeparableConv2d(3072, 5120,3,1,1)
        self.bn6 = nn.BatchNorm2d(5120)
        self.last_linear = nn.Linear(5120, num_classes)
#         self.fc = nn.Linear(2048, num_classes)
    def forward(self, input):
        x = self.base_model.features(input)
        x = self.base_model.relu(x)
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.base_model.relu(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = self.base_model.relu(x)
        x = F.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

In [None]:
model = Xception_base(5005, pretrained=None)

In [None]:
model = model.cuda()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)
scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

## Predict

In [None]:
n_epochs = 13
mean_losss = 99
is_best = False
for epoch in range(1, n_epochs+1):
    train_loss = []
    
    for batch_i, (data, target) in tqdm(enumerate(train_loader), total = len(train_loader)):
        data, target = data.cuda(), target.cuda()

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target.float())
        train_loss.append(loss.item())

        loss.backward()
        optimizer.step()
    
    scheduler.step()
    if mean_losss > np.mean(train_loss):
        is_best = True
        mean_losss = np.mean(train_loss)
    else:
        is_best = False
    save_checkpoint({'epoch': epoch,
                         'state_dict': model.state_dict(),
                         'optimizer' : optimizer.state_dict(),
                        }, is_best)
    print(f'Epoch {epoch}, train loss: {np.mean(train_loss):.4f}')

In [None]:
model.eval()
for (data, target, name) in tqdm(test_loader):
    data = data.cuda()
    output = model(data)
    output = output.cpu().detach().numpy()
    for i, (e, n) in enumerate(list(zip(output, name))):
        sub.loc[sub['Image'] == n, 'Id'] = ' '.join(label_encoder.inverse_transform(e.argsort()[-5:][::-1]))
        
sub.to_csv('submission_xception_1.csv', index=False)