In [1]:
import os
import math
import torchvision 
import torch.nn as nn
import torch
import cv2
from tqdm.notebook import tqdm
import numpy as np # linear algebra
import pandas as pd
import gc

In [2]:
import pandas as pd
train_df = pd.read_csv('data/imaterialist-fashion-2020-fgvc7/train.csv')

In [3]:
attr = train_df.AttributesIds
clss = train_df.ClassId
not_exist = attr.isna()
non_nan_classes=[]
for i in range(len(not_exist)):
    if not not_exist[i]:
        if clss[i] not in non_nan_classes:
            non_nan_classes.append(clss[i])
            
non_nan_classes.sort()

In [7]:
filtered_df = pd.DataFrame(train_df[train_df.ClassId.isin(non_nan_classes)])

In [8]:
class CONFIG():
    LABELS_LENGTH = 294

In [9]:
'''class AttrModel(nn.Module):
    def __init__(self, num_classes=294):
        super(AttrModel, self).__init__()
        self.num_classes = num_classes
        Base_Model = torchvision.models.resnet18(pretrained=True)
        
        children = list(Base_Model.children())
        
        self.conv = children[0]
        self.batch_norm = children[1]
        self.relu = children[2]
        self.maxpool = children[3]
        self.blocks1 = children[4]
        self.blocks2 = children[5]
        self.blocks3 = children[6]
        self.blocks4 = children[7]
        self.adp_avg_pool = children[8]
        self.fc = nn.Linear(children[9].in_features, self.num_classes)
        
        
    def forward(self, x):
        x = self.conv(x)
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.blocks1(x)
        x = self.blocks2(x)
        x = self.blocks3(x)
        x = self.blocks4(x)
        x = self.adp_avg_pool(x)
        x = self.fc(x)
        x = torch.nn.functional.sigmoid(x)
        return x'''

'class AttrModel(nn.Module):\n    def __init__(self, num_classes=294):\n        super(AttrModel, self).__init__()\n        self.num_classes = num_classes\n        Base_Model = torchvision.models.resnet18(pretrained=True)\n        \n        children = list(Base_Model.children())\n        \n        self.conv = children[0]\n        self.batch_norm = children[1]\n        self.relu = children[2]\n        self.maxpool = children[3]\n        self.blocks1 = children[4]\n        self.blocks2 = children[5]\n        self.blocks3 = children[6]\n        self.blocks4 = children[7]\n        self.adp_avg_pool = children[8]\n        self.fc = nn.Linear(children[9].in_features, self.num_classes)\n        \n        \n    def forward(self, x):\n        x = self.conv(x)\n        x = self.batch_norm(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.blocks1(x)\n        x = self.blocks2(x)\n        x = self.blocks3(x)\n        x = self.blocks4(x)\n        x = self.adp_avg_pool(x)\n  

In [10]:
class Attr_Dataset(torch.utils.data.Dataset):
    def __init__(self, config,  df):
        self.IMGS = list(filtered_df.ImageId)
        self.config = config
        self.df = df
        
        self.Enc = list(self.df.EncodedPixels)
        self.H = list(self.df.Height)
        self.W = list(self.df.Width)
        self.Attr = list(self.df.AttributesIds)
        
    def vectorize_labels(self, labels):
        vector = np.array([0]*self.config.LABELS_LENGTH)
        for idx in labels:
            vector[idx]=1
        return vector
            
    def change_label_indices(self, labels):
        for i in range(len(labels)):
            if labels[i]>234:
                labels[i]-=47
        return labels
    
    def make_single_mask(self, encoded_string, height, width):
        splitted_string = np.array(list(map(int, encoded_string.split()))).reshape(-1,2)
        mask = np.zeros((height*width), dtype=np.uint8)
        for start_indice, run_length in splitted_string:
            start_indice-=1
            mask[start_indice:start_indice+run_length] = 1
        return mask.reshape((height, width), order='F')
        
        
    def __getitem__(self, idx):
        img = np.array(cv2.cvtColor(cv2.imread('data/imaterialist-fashion-2020-fgvc7/train/' + self.IMGS[idx] + '.jpg'), cv2.COLOR_BGR2RGB)/255, dtype=np.float32)
        mask = self.make_single_mask(self.Enc[idx], self.H[idx], self.W[idx])
        mask_ = np.stack((mask,mask,mask), axis=-1)
        img = img*mask_
        img = cv2.resize(img, (1024, 1024), interpolation = cv2.INTER_AREA)
        labels = self.Attr[idx]
        if not (labels!=labels):        
            labels = self.change_label_indices(list(map(int, labels.split(','))))
        else:
            labels=[]
        label_vector = self.vectorize_labels(labels)
        
        
        return np.moveaxis(img, -1, 0), label_vector
    
    def __len__(self):
        return len(self.IMGS)

def collate_fn(batch):
    return tuple(zip(*batch))

In [11]:
class AttrModel(nn.Module):
    def __init__(self, num_classes=294):
        super(AttrModel, self).__init__()
        self.num_classes = num_classes
        Base_Model = torchvision.models.resnet18(pretrained=True, progress=True)
        Base_Model.fc = nn.Linear(Base_Model.fc.in_features, num_classes)
        self.Model_Backend = Base_Model
        
        self.sigmoid = torch.nn.Sigmoid()
    def forward(self, x):
        x = self.Model_Backend(x)
        x = self.sigmoid(x)
        return x

In [12]:
saving_steps = 200
epoch=0
batch_size=12
last = 672

In [13]:
#AttrM = AttrModel()
AttrM = torch.load('models/Base.pt')   
conf = CONFIG()
Attr_dataset = Attr_Dataset(conf, filtered_df)
Attr_data_loader = torch.utils.data.DataLoader(Attr_dataset, batch_size=8, shuffle=True)

In [14]:
criterion = nn.BCELoss()
params = [p for p in AttrM.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,momentum=0.9, weight_decay=0.0005)
DEVICE = torch.device('cuda:1')
_ = AttrM.to(DEVICE)
_ = AttrM.train()

In [None]:

loss_tracking=0
for i in tqdm(range(last, len(Attr_dataset)//batch_size, 1)):
    images = []
    targets = []
    
    for j in range(batch_size):
        I, T = Attr_dataset[i*batch_size+j]
        images.append(I)
        targets.append(T)
        
    images = torch.tensor(images, dtype=torch.float32).to(DEVICE)
    targets = torch.tensor(targets, dtype=torch.float32).to(DEVICE)
    
    out = AttrM(images)
    loss =  criterion(out, targets)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print('loss at step ', i, 'is', loss.item())
    loss_tracking+=loss.item()
    
    if i%saving_steps==0:
        print('================================  Model Saved  ================================')
        torch.save(AttrM, 'models/Attr_epoch_'+str(epoch)+'_step_'+str(i)+'_Loss_'+str(loss_tracking/(i+1-last)))
        torch.cuda.empty_cache()
        gc.collect()


HBox(children=(IntProgress(value=0, max=19013), HTML(value='')))

loss at step  672 is 0.05351759493350983
loss at step  673 is 0.05278264358639717
loss at step  674 is 0.0606030598282814
loss at step  675 is 0.048182904720306396
loss at step  676 is 0.06026555597782135
loss at step  677 is 0.04453951492905617
loss at step  678 is 0.0445040799677372
loss at step  679 is 0.0501612164080143
loss at step  680 is 0.050962407141923904
loss at step  681 is 0.056361038237810135
loss at step  682 is 0.05171194300055504
loss at step  683 is 0.049835916608572006
loss at step  684 is 0.054386772215366364
loss at step  685 is 0.05372697114944458
loss at step  686 is 0.050018180161714554
loss at step  687 is 0.061788804829120636
loss at step  688 is 0.04037660360336304
loss at step  689 is 0.056798990815877914
loss at step  690 is 0.04131759703159332
loss at step  691 is 0.05348939076066017
loss at step  692 is 0.04956731200218201
loss at step  693 is 0.04834343120455742
loss at step  694 is 0.04926472529768944
loss at step  695 is 0.056187793612480164
loss at st

In [None]:
o