In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
import os
from tqdm import tqdm_notebook as tqdm
import torch
import random
import torchvision
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils ,models
from PIL import Image, ImageFilter
import matplotlib.patches as patches
pd.set_option('display.width', 120)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
# Any results you write to the current directory are saved as output.
# All paths are relative to kaggle kernel

['resnet101', 'imet-2019-fgvc6']


In [2]:
''' from here  https://www.kaggle.com/c/imet-2019-fgvc6/discussion/87675#latest-516375'''
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything()

In [3]:
root = '../input/imet-2019-fgvc6/' #root directory
SIZE = 400 #image height and width
EPOCH = 20 #number of epochs to train
BATCH_SIZE = 128 #batch size
PRINT_EVERY = int(10000/BATCH_SIZE) #print loss after these many batches

In [4]:
df_train = pd.read_csv(root + 'train.csv')
train_path = root + 'train/'
df_label = pd.read_csv(root + 'labels.csv')
print(df_train.shape)
print(df_label.shape)
print(df_train.head())

(109237, 2)
(1103, 2)
                 id        attribute_ids
0  1000483014d91860          147 616 813
1  1000fe2e667721fe       51 616 734 813
2  1001614cb89646ee                  776
3  10041eb49b297c08  51 671 698 813 1092
4  100501c227f8beea  13 404 492 903 1093


In [5]:
# train test valid split 
msk = np.random.rand(len(df_train)) < 0.96
train = df_train[msk]
valid  = df_train[~msk]
msk = np.random.rand(len(train)) < 0.96
test = train[~msk]
train = train[msk]
train.to_csv("train.csv",index=None)
test.to_csv("test.csv",index=None)
valid.to_csv("valid.csv",index=None)
print(pd.read_csv('valid.csv').shape)
print(pd.read_csv('test.csv').shape)
print(pd.read_csv('train.csv').shape)
print(pd.read_csv('valid.csv').head())

(4368, 2)
(4116, 2)
(100753, 2)
                 id            attribute_ids
0  1027bc2aeca8e1a8              13 903 1092
1  102cb701e5718d70             147 639 1035
2  1048e4b13229f27d                  438 369
3  105b6c58db321045                 121 1035
4  105f044288493432  348 418 542 579 624 796


In [6]:
def to_onehot(tags):
    '''converts a string of space seperated
    tags to one hot repersentation'''
    tags_list = tags.split()
    ten = torch.zeros((df_label.shape[0]))
    for i in tags_list:
        ten[int(i)] = 1
    return ten

In [7]:
class givendataset(Dataset):
    """ dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a PIL image.
        """
        self.frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,self.frame['id'][idx])
        image = Image.open(img_name+'.png')
       
        if self.transform:
            image = self.transform(image)
        
        image = torchvision.transforms.ToTensor()(image)
        image = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])(image)
        tags = to_onehot(self.frame['attribute_ids'][idx])
        sample = {'image': image, 'tags': tags}
        return sample

In [8]:
train_trans = torchvision.transforms.Compose([
                        torchvision.transforms.Resize((SIZE,SIZE), interpolation=2),
                        torchvision.transforms.RandomHorizontalFlip(p=0.5)])
test_trans = torchvision.transforms.Compose([
                        torchvision.transforms.Resize((SIZE,SIZE), interpolation=2)])
train_dataset = givendataset('train.csv',root + 'train',train_trans )
test_dataset = givendataset('test.csv',root + 'train',test_trans )

In [9]:
print(train_dataset[10]['image'].shape)
print(train_dataset[10]['tags'].shape)

torch.Size([3, 400, 400])
torch.Size([1103])


In [10]:
train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)

In [11]:
model = models.resnet101(pretrained=False)
model.load_state_dict(torch.load('../input/resnet101/resnet101.pth')) #loading imagenet weights
model.fc = torch.nn.Sequential(
        torch.nn.Linear(in_features=2048, out_features=2048, bias=True),
        torch.nn.ReLU(),
        torch.nn.Dropout(p=0.5),
        torch.nn.Linear(in_features=2048, out_features=df_label.shape[0], bias=True))
print(model.fc)
for i in model.parameters():
    i.requires_grad=False
for i in model.fc.parameters():
    i.requires_grad=True
for i in model.layer4.parameters():
    i.requires_grad=True

Sequential(
  (0): Linear(in_features=2048, out_features=2048, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5)
  (3): Linear(in_features=2048, out_features=1103, bias=True)
)


In [13]:
def getpred(path,root,threshold=0.2):
    '''path to CSV file, root folder for images 
    and threshold '''
    df_test = pd.read_csv(path)
    batch = 100
    test_dataset = givendataset(path,root,test_trans)
    test_loader  = torch.utils.data.DataLoader(test_dataset,batch_size=batch,shuffle=False)
    model.eval()
    with torch.no_grad():
         for j,i in enumerate(test_loader):
            data = i['image'].type(torch.FloatTensor)
            target = i['tags'].type(torch.FloatTensor)
            if train_on_gpu: # move tensors to GPU if CUDA is available
                data, target = data.cuda(), target.cuda()
            out = (torch.nn.Sigmoid()(model(data).cpu().detach())).numpy()
            for l in range(out.shape[0]):
                df_test['attribute_ids'][j*batch + l] = " ".join([str(i) for i in np.argwhere(out[l] > threshold)[:,0].tolist()])
    model.train()
    return df_test

In [14]:
def getF2(df1,df2,epsilon=0.0000001):
    ''' F2 scores of two dataframes with attribute_ids
    column containing class labels
    df1 is true dataframe
    df2 is predicted dataframe '''
    assert df1.shape == df2.shape
    beta = 2
    i = 0.0
    for index in range(df1.shape[0]):
        y1 = to_onehot(df1['attribute_ids'][index])
        y2 = to_onehot(df2['attribute_ids'][index])
        true_positives = (y1 * y2).sum()
        predicted_positives = y2.sum()
        possible_positives = y1.sum()
        precision = true_positives / (predicted_positives + epsilon)
        recall = true_positives / (possible_positives + epsilon)
        i += (((1+beta**2)*precision*recall) / ((beta**2)*precision+recall+epsilon))
    return i/(df1.shape[0])

In [15]:
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    model.cuda()
pos_wei=torch.tensor(4*np.ones((df_label.shape[0]))).type(torch.FloatTensor)
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean',pos_weight=pos_wei).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.003)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.5)

In [16]:
n_epochs = EPOCH # number of epochs to train the model
test_loss_min = np.inf # track change in validation loss  
F2_max    = 0.0
for epoch in range(1, n_epochs+1):
    train_loss = 0.0 # keep track of training and validation loss
    test_loss = 0.0
    ###################
    # train the model #
    ###################
    model.train()
    for p, i in enumerate(train_loader,1):
        data = i['image'].type(torch.FloatTensor)
        target = i['tags'].type(torch.FloatTensor)
        if train_on_gpu: # move tensors to GPU if CUDA is available
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad() # clear the gradients of all optimized variables
        output = model(data) # forward pass: compute predicted outputs by passing inputs to the model
        loss = criterion(output, target) # calculate the batch loss
        loss.backward() #compute gradient of the loss with respect to model parameters
        optimizer.step() # perform a single optimization step (parameter update)
        train_loss += loss.item()*data.size(0) # update training loss
        
        if p % PRINT_EVERY == 1: #print loss every few batches
            print("after {:6} images loss is {:.5f}".format(p*BATCH_SIZE,loss.item()))
    
    del data,target #free some GPU mem 
    ######################    
    # validate the model #
    ######################
    model.eval()
    for i in test_loader:
        data = i['image'].type(torch.FloatTensor)
        target = i['tags'].type(torch.FloatTensor)
        if train_on_gpu: # move tensors to GPU if CUDA is available
            data, target = data.cuda(), target.cuda()
        output = model(data)# forward pass: compute predicted outputs by passing inputs to the model
        loss = criterion(output, target)# calculate the batch loss
        test_loss += loss.item()*data.size(0) # update average validation loss 
    
    #get some idea of F2 score threshold is set to 0.2
    df_test_pred = getpred('test.csv',root + 'train/',0.2)
    df_test = pd.read_csv('test.csv')
    F2 = getF2(df_test,df_test_pred)
    print("F2 score is {:.5f}".format(F2))
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.dataset)
    test_loss = test_loss/len(test_loader.dataset)
    scheduler.step()
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, train_loss, test_loss))
    
    # save model if validation loss has decreased
    if test_loss <= test_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        test_loss_min,
        test_loss))
        torch.save(model.state_dict(), 'model_cifar.pt')
        test_loss_min = test_loss
    if F2 >= F2_max:
        print('F2 score increased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        F2_max,F2))
        torch.save(model.state_dict(), 'model_cifar_.pt')
        F2_max = F2

after    128 images loss is 0.70170
after  10112 images loss is 0.04458
after  20096 images loss is 0.03635
after  30080 images loss is 0.03259
after  40064 images loss is 0.03393
after  50048 images loss is 0.03032
after  60032 images loss is 0.03156
after  70016 images loss is 0.02976
after  80000 images loss is 0.02729
after  89984 images loss is 0.03001
after  99968 images loss is 0.02726
F2 score is 0.43066
Epoch: 1 	Training Loss: 0.035227 	Validation Loss: 0.028107
Validation loss decreased (inf --> 0.028107).  Saving model ...
F2 score increased (0.000000 --> 0.430664).  Saving model ...
after    128 images loss is 0.02569
after  10112 images loss is 0.02805
after  20096 images loss is 0.03102
after  30080 images loss is 0.02694
after  40064 images loss is 0.02717
after  50048 images loss is 0.02640
after  60032 images loss is 0.02990
after  70016 images loss is 0.02790
after  80000 images loss is 0.02830
after  89984 images loss is 0.02677
after  99968 images loss is 0.02685
F

In [17]:
def findThre(start, end, step):
    '''finds the Threshold from the range'''
    a = np.arange(start, end, step)
    df_valid = pd.read_csv('test.csv')
    b = []
    for i in range(a.shape[0]):
        df_test = getpred('test.csv',root + 'train/',a[i])
        f2 = getF2(df_valid,df_test).item()
        print("for threshold {:.3f}     score is {:.6f}".format(a[i],f2))
        b.append(f2)
    return a[b.index(max(b))]

In [18]:
%%time
model.load_state_dict(torch.load('model_cifar_.pt'))
th = findThre(0.1,0.61,0.1) #find rough estimate
print("Rough estimate is {:.6f}".format(th))
th = findThre(th - 0.05,th + 0.055,0.033) # final estimate
print("Final estimate is {:.6f}".format(th))
df_test_pred = getpred('valid.csv',root + 'train/',th)
df_test = pd.read_csv('valid.csv')
print(df_test_pred.head())
print(df_test.head())
F2 = getF2(df_test,df_test_pred)
print("Final F2 score is {:.5f}".format(F2))

for threshold 0.100     score is 0.546024
for threshold 0.200     score is 0.569201
for threshold 0.300     score is 0.560389
for threshold 0.400     score is 0.543734
for threshold 0.500     score is 0.521868
for threshold 0.600     score is 0.492648
Rough estimate is 0.200000
for threshold 0.150     score is 0.562719
for threshold 0.183     score is 0.567865
for threshold 0.216     score is 0.568553
for threshold 0.249     score is 0.566841
Final estimate is 0.216000
                 id                                      attribute_ids
0  1027bc2aeca8e1a8                                13 671 698 813 1092
1  102cb701e5718d70                                       147 189 1034
2  1048e4b13229f27d                                            369 438
3  105b6c58db321045                                       121 369 1035
4  105f044288493432  147 189 259 418 541 542 543 579 581 753 813 94...
                 id            attribute_ids
0  1027bc2aeca8e1a8              13 903 1092
1  102cb70

In [21]:
#generate the submission
df_test = getpred(root + 'sample_submission.csv',root + 'test/',th)
df_test.to_csv('submission.csv', index=False) 
print(df_test.head())

                 id                                      attribute_ids
0  10023b2cc4ed5f68                           223 289 369 587 766 1059
1  100fbe75ed8fd887                                        93 231 1039
2  101b627524a04f19                                         79 420 784
3  10234480c41284c6  147 480 483 501 522 725 737 738 776 830 1046 1068
4  1023b0e2636dcea8  147 283 322 492 538 584 616 698 813 954 1046 1...
