In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.vision import *
from fastai.metrics import accuracy
from fastai.basic_data import *
from skimage.util import montage

In [3]:
import pandas as pd
from torch import optim
import re
import albumentations

In [4]:
from utils import *

In [5]:
import fastai
from fastprogress import force_console_behavior
import fastprogress
fastprogress.fastprogress.NO_BAR = True
master_bar, progress_bar = force_console_behavior()
fastai.basic_train.master_bar, fastai.basic_train.progress_bar = master_bar, progress_bar

# Set up augmentation

In [6]:
aug = albumentations.Compose([
    albumentations.RandomBrightnessContrast(p=0.75),
    albumentations.ShiftScaleRotate(shift_limit=0, scale_limit=0.1, rotate_limit=10, interpolation=2, p=0.75)
])

composed_augs = albumentations.Compose([
    albumentations.RandomBrightnessContrast(p=0.75),
    albumentations.ShiftScaleRotate(shift_limit=0, scale_limit=0.1, rotate_limit=10, interpolation=2, p=0.75)
])



In [7]:
def augment(ary):
    return composed_augs(image=ary)['image']

# Some utils functions

In [8]:
def paths_to_files_in(dir):
    paths = []
    for path in Path(dir).iterdir():
        if path.is_dir():
            paths += paths_to_files_in(path)
        else:
            paths.append(path)
    return paths

def open_image(path): return PIL.Image.open(path).convert('RGB')

def image2ary(image): return np.asarray(image)

def ary2tensor(ary, dtype=np.float32): return torch.from_numpy(ary.astype(dtype, copy=False))

def image2tensor(image, augment_fn=None):
    ary = image2ary(image)
    if augment_fn: ary = augment_fn(ary)
    ary = ary.transpose(2, 0, 1)
    tensor = ary2tensor(ary)
    return tensor.div_(255)

imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
mean, std = torch.from_numpy(np.array(imagenet_stats).astype(np.float32))

def imagenet_normalize(tensor):
    zero_centered = tensor - mean[:, None, None]
    return zero_centered / std[:, None, None]

def imagenet_denormalize(zero_centered):
    zero_centered = zero_centered * std[:, None, None]
    return zero_centered + mean[:, None, None]




# Dataset

In [9]:
class Dataset():
    def __init__(self, items, reader, labeler):
        self.items, self.reader, self.labeler = items, reader, labeler

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        item = self.items[idx]
        return self.reader(item), self.labeler(item)


In [10]:
class Reader():
    def __init__(self, path, augment_fn=None):
        self.path = path
        self.augment_fn = augment_fn
    def __call__(self, fns):
        paths = [f'{self.path}/{filename}' for filename in fns]
        images = [open_image(image_path) for image_path in paths]
        tensors = [image2tensor(image, augment_fn = self.augment_fn) for image in images]
        return [imagenet_normalize(tensor) for tensor in tensors]

In [11]:
pd.read_csv('data/train.csv').Id.nunique()

5005

In [12]:
class Labeler():
    def __init__(self):
        df = pd.read_csv('data/train.csv')
        self.fn2label = {}
        for row in df[df.Id != 'new_whale'].itertuples():
            self.fn2label[row.Image] = row.Id
        self.classes = sorted(list(set(list(self.fn2label.values()))))
    def __call__(self, fns):
        labels = [self.fn2label[fn] for fn in fns]
        # if 2 imgs have same label, return [label1_idx,label1_idx,0]. Else return [label1_idx,label2_idx,1]
        return [self.classes.index(label) for label in labels] + [1 if labels[0] != labels[1] else 0]

In [13]:
def create_basic_dataloader(sz, batch_size, num_workers=12):
    reader = Reader(f'data/train-extracted-{sz}')
    basic_ds = Dataset([*zip(df.Image.tolist(), df.Image.tolist())], reader, labeler)
    return DataLoader(basic_ds, batch_size=batch_size, num_workers=num_workers)

In [15]:
SZ = 224
NUM_WORKERS = 12
BS = 32

In [None]:
# When I refer to 'whale', I mean a particular image (the file name).

df = pd.read_csv('data/train.csv')
df = df[df.Id != 'new_whale']
images_without_meaningful_bbox_predictions = \
    ['85a95e7a8.jpg', 'b370e1339.jpg', 'b4cb30afd.jpg', 'd4cb9d6e4.jpg', '6a72d84ca.jpg']
df = df[~df.Image.isin(images_without_meaningful_bbox_predictions)]

In [16]:
labeler = Labeler()

df.head() # no new whale and imgs with nonsense bbox prediction

Unnamed: 0,Image,Id
0,0000e88ab.jpg,w_f48451c
1,0001f9222.jpg,w_c3d896a
2,00029d126.jpg,w_20df2c5
6,000a6daec.jpg,w_dd88965
8,0016b897a.jpg,w_64404ac


In [17]:
basic_dataloader = create_basic_dataloader(SZ, BS, NUM_WORKERS)

In [18]:
x,y = next(iter(basic_dataloader))

In [38]:
len(x),x[0].shape,x[1].shape

(2, torch.Size([32, 3, 224, 224]), torch.Size([32, 3, 224, 224]))

In [20]:
len(y),y[0][:5],y[1][:5],y[2][:5]

(3,
 tensor([4785, 3807,  661, 4314, 1928]),
 tensor([4785, 3807,  661, 4314, 1928]),
 tensor([0, 0, 0, 0, 0]))

In [21]:
y[2] # ???

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])

# Model

In [24]:
class CustomModel(nn.Module):  # Siamese + classification
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(*list(models.resnet50(True).children())[:-2]) #out: (bn,?,? 2048)
        self.head = create_head(4096 # adaptive concat pool (2048*2)
                                , 5004, [2048]) # out: 5004 (# of classes ignoring new whale)
        self.ada_concat = AdaptiveConcatPool2d(1)

    def forward(self, ims_a, ims_b):
        cnn_out_a = self.cnn(ims_a) # (bs,?,?,2048), will also be returned after concat pool (for siamese). Output shape (bs,2048*2)
        out_a = self.head(cnn_out_a) # (bs,5004), will be returned for classification
        
        cnn_out_b = self.cnn(ims_b) # (bs,?,?,2048)
        out_b = self.head(cnn_out_b) # (bs,5004)
    
        return out_a, out_b, self.ada_concat(cnn_out_a).squeeze(), self.ada_concat(cnn_out_b).squeeze()

In [25]:
temp = list(models.resnet50(True).children())

In [26]:
temp1 = create_head(4096, 5004, [2048])
temp1

Sequential(
  (0): AdaptiveConcatPool2d(
    (ap): AdaptiveAvgPool2d(output_size=1)
    (mp): AdaptiveMaxPool2d(output_size=1)
  )
  (1): Flatten()
  (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25)
  (4): Linear(in_features=4096, out_features=2048, bias=True)
  (5): ReLU(inplace)
  (6): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5)
  (8): Linear(in_features=2048, out_features=5004, bias=True)
)

In [27]:
temp[-3:]

[Sequential(
   (0): Bottleneck(
     (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
     (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (relu): ReLU(inplace)
     (downsample): Sequential(
       (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
       (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     )
   )
   (1): Bottleneck(
     (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 

# Loss and metric

In [28]:
class ContrastiveLoss(torch.nn.Module):
    """
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    
    loss = mean( (1-Y)*0.5* e_dist**2 + Y*0.5* max(0,m-e_dist)**2 )
    
    e_dist: euclidean distance b/t 2 outputs 
    Y     : 0 if same class, else 1
    m     : upper limit for dissimilar pairs (>0). Dissimilar pairs beyond this margin will not contribute to the loss
    """

    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

In [29]:

MARGIN = 60

def cross_entropy_loss(preds, labels_a, labels_b, diff_class_ind):
    return F.cross_entropy(preds[0], labels_a) + F.cross_entropy(preds[1], labels_b)

def contr_loss(preds, labels_a, labels_b, diff_class_ind):
    c_loss = ContrastiveLoss(MARGIN)
    return c_loss(preds[2], preds[3], diff_class_ind.float())

def loss_fn(preds, labels_a, labels_b, diff_class_ind):
    # combine C.E and Contrastive
    # 10*CE_loss + 1/25 contr_loss
    return 10 * cross_entropy_loss(preds, labels_a, labels_b, diff_class_ind) + contr_loss(preds, labels_a, labels_b, diff_class_ind) / 25

In [32]:
def accuracy_mod(preds, labels_a, labels_b, diff_class_ind):
    return 0.5 * accuracy(preds[0], labels_a) + 0.5 * accuracy(preds[1], labels_b)

def map5_mod(preds, labels_a, labels_b, diff_class_ind):
    return 0.5 * map5(preds[0], labels_a) + 0.5 * map5(preds[1], labels_b)

# Start loading data using model

In [30]:
def create_similarity_dict(model, dataloader):
    # Calculating descriptors for each image
    descs = []
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            ims = batch[0][0].cuda() # get batch of 1st img (bs,3,224,224)
            cnn_out = learn.model.cnn(ims) # (bn,?,? 2048)
            descs.append(learn.model.ada_concat(cnn_out).squeeze().detach().cpu()) # (bs,2048*2)

    descs = torch.cat(descs).cuda() # (# of imgs, 2048*2)

    # Calculating similarity dict for each image
    dists = {}
    # { img0_name: [dist(img0,img0),dist(img0,img1) ... ] 
    #   img1_name: [dist(img1,img0),dist(img1,img1) ] 
    #}
    for i, (whale, _) in enumerate(dataloader.items): 
        dists[whale] = torch.pairwise_distance(descs[i], descs).cpu().numpy()    
    return dists

In [40]:
# basic_dataloader.items[0]

('0000e88ab.jpg', '0000e88ab.jpg')

In [None]:
# TODO: create a fake data just to get learn.model? There has to be a better way

def create_fake_data(): # needed for loading the model
    fake_ds = Dataset([],_,_)
    fake_dl = DataLoader(fake_ds)

    data = DataBunch(fake_dl, fake_dl)
    data.train_ds.loss_func = lambda: None 
    
    return data

In [34]:
learn = Learner(create_fake_data(), CustomModel(), loss_func=loss_fn, metrics=[accuracy_mod, map5_mod, cross_entropy_loss, contr_loss])
learn = learn.clip_grad()
learn.split((learn.model.cnn[6], learn.model.head));

In [35]:
# from untrain model!
dists = create_similarity_dict(learn.model, basic_dataloader)


In [43]:
dists['0000e88ab.jpg'].shape,dists['0000e88ab.jpg'][:5]

((15694,),
 array([6.400001e-05, 5.041779e+01, 5.549538e+01, 6.339818e+01, 5.291072e+01], dtype=float32))

In [46]:
def create_data(sz, dist_dict, batch_size, k=20, num_workers=12, train_on_both_train_and_val=False):
    reader_aug = Reader(f'data/train-extracted-{sz}', augment_fn=augment)
    reader = Reader(f'data/train-extracted-{sz}')
    
    val_fns = list(pd.read_pickle('data/val_fname_no_nw.pkl'))
    val_fns_set = set(val_fns)

    trn_df = df[~df.Image.isin(val_fns)]
    val_df = df[df.Image.isin(val_fns)]
    
    # TODO: this is poorly written
    ds_on_which_dists_were_calculated = Dataset([*zip(df.Image.tolist(), df.Image.tolist())], reader, labeler)
    
    uniq_whales = df.Id.unique().tolist() if train_on_both_train_and_val else trn_df.Id.unique().tolist()

    def sample_other_whale():
        # sample 1 img from k 'toughest match' from 'this_whale'
        candidate_whales = dist_dict[this_whale].argsort() 
        this_whale_class = labeler.fn2label[this_whale]
        candidate_fns = []
        for i in range(200):
            # 1st img_fn from ds_on_which_dists_were_calculated (with idx, going from lowest dist to 'this_whale' to 200th dist) 
            candidate_whale = ds_on_which_dists_were_calculated.items[candidate_whales[i]][0] # 
            
            # add to 'candidate whale' fns list if the candidate_fname not in val fns and have different label than 'this_whale'
            if (candidate_whale not in val_fns_set) and (labeler.fn2label[candidate_whale] != this_whale_class): 
                candidate_fns.append(candidate_whale)
                
            # we only need k values of candidate whale fns
            if len(candidate_fns) == k: break 
        np.random.shuffle(candidate_fns) # randomly pick one from K toughest matches. TODO: toughest? argsort return closest dist first
        return candidate_fns[0]

    def sample_this_whale(): 
        # sample one img from given IDs
        return this_whale_df.sample(n=1).iloc[0].Image

    
    
    train_items = []
    for whale in uniq_whales: # loop through unique IDs
        this_whale_df = trn_df[trn_df.Id == whale]
        other_whale_df = trn_df[trn_df.Id != whale]

        this_whale = sample_this_whale()

        # sampling same whale if possible
        if this_whale_df.shape[0] == 1: 
            # only a single picture of this whale in dataset -> sample other whale
            other_whale = sample_other_whale()
            train_items.append([this_whale, other_whale])
        else:
            # get the img of whale with same id, but not itself
            same_whale = this_whale_df[this_whale_df.Image != this_whale].sample(n=1).iloc[0].Image
            train_items.append([this_whale, same_whale])

        # sampling different whales
        this_whale = sample_this_whale()
        train_items.append([this_whale, sample_other_whale()])
    
    if train_on_both_train_and_val:
        valid_items = list(zip(val_df.Image.values[:batch_size].tolist(), val_df.Image.values[BS:2*batch_size].tolist()))
    else:
        valid_items = list(zip(val_df.Image.values[:1465].tolist(), val_df.Image.values[1465:2930].tolist())) #???

    train_ds = Dataset(train_items, reader_aug, labeler)
    valid_ds = Dataset(valid_items, reader, labeler)

    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    valid_dl = DataLoader(valid_ds, batch_size=batch_size, num_workers=num_workers)

    data = DataBunch(train_dl, valid_dl)
    data.train_ds.loss_func = lambda: None
    
    return data

In [47]:
data = create_data(SZ, dists, BS)

In [59]:
x,y = next(iter(data.train_dl))

In [60]:
len(x),len(y)

(2, 3)

In [61]:
y[-1]

tensor([1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 0, 1], device='cuda:0')

In [62]:
%%time

learn = Learner(data, CustomModel(), loss_func=loss_fn, metrics=[accuracy_mod, map5_mod, cross_entropy_loss, contr_loss])
learn = learn.clip_grad()
learn.split((learn.model.cnn[6], learn.model.head))
learn.freeze()

learn.fit_one_cycle(6, 1e-2)
learn.save(name())

epoch     train_loss  valid_loss  accuracy_mod  map5_mod  cross_entropy_loss  contr_loss  time    
0         162.743042  173.225174  0.021502      0.037344  17.087849           58.668659   02:10     
1         121.573799  141.476654  0.083618      0.112452  14.079737           16.981930   02:10     
2         61.496269   113.136841  0.202048      0.255848  11.201220           28.115734   02:10     
3         29.561525   92.655258   0.361775      0.420563  9.182215            20.828007   02:10     
4         16.557791   82.454742   0.423549      0.483174  8.152318            23.288803   02:10     
5         13.008657   79.646957   0.447099      0.502389  7.865516            24.795168   02:10     


NameError: name 'name' is not defined

Get submission:

https://github.com/radekosmulski/whale/blob/master/classification_and_metric_learning.ipynb