In [1]:
import torch
import torch.nn as nn
from skimage import io, transform

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self,image_path,features,targets,augmentations=None):
        self.image_path = image_path
        self.features = features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_path)
    
    def __getitem__(self,item):
        image = io.imread(self.image_path[item])
        features = self.features[item,:]
        targets = self.targets[item]
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }
        

In [2]:
! pip install timm

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[K     |████████████████████████████████| 376 kB 2.1 MB/s 
Installing collected packages: timm
Successfully installed timm-0.4.12


In [3]:
import timm

In [4]:
timm.list_models('*swin*')

['eca_swinnext26ts_256',
 'swin_base_patch4_window7_224',
 'swin_base_patch4_window7_224_in22k',
 'swin_base_patch4_window12_384',
 'swin_base_patch4_window12_384_in22k',
 'swin_large_patch4_window7_224',
 'swin_large_patch4_window7_224_in22k',
 'swin_large_patch4_window12_384',
 'swin_large_patch4_window12_384_in22k',
 'swin_small_patch4_window7_224',
 'swin_tiny_patch4_window7_224',
 'swinnet26t_256',
 'swinnet50ts_256']

In [5]:
# m = timm.create_model('swin_large_patch4_window12_384')
# m

In [6]:
def train(model,train_loader,device,optimizer):
    model.train()
    running_train_loss = 0.0
    for data in train_loader:
        inputs = data['image']
        features = data['features']
        targets = data['targets']

        inputs = inputs.to(device, dtype=torch.float)
        features = features.to(device,dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)

        optimizer.zero_grad()
        outputs = model(inputs,features)
        loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
        loss.backward()
        optimizer.step()
        running_train_loss +=loss.item()
        
    train_loss_value = running_train_loss/len(train_loader)
    print(f'train BCE loss is {train_loss_value}')
    
def eval(model,valid_loader,device,optimizer):
    model.eval()
    final_targets = []
    final_outputs = []
    running_val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs = data['image']
            features = data['features']
            targets = data['targets']
            inputs = inputs.to(device, dtype=torch.float)
            features = features.to(device,dtype=torch.float)
            targets = targets.to(device, dtype=torch.float)

            output = model(inputs,features)
            running_val_loss += nn.BCEWithLogitsLoss()(output, targets.view(-1, 1))
            targets = (targets.detach().cpu().numpy()*100).tolist()
            output = (torch.sigmoid(output).detach().cpu().numpy()*100).tolist()
            final_outputs.extend(output)
            final_targets.extend(targets)
        val_loss = running_val_loss/len(valid_loader)    
        print(f'valid BCE loss is {val_loss}')
    return final_outputs,final_targets      
           

In [7]:
import torch
import torch.nn as nn

model_name = 'swin_base_patch4_window7_224_in22k'

out_dim    = 1

class get_model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        self.model.head = nn.Sequential(nn.Linear(self.model.head.in_features,768),
                                              nn.Linear(768,256))
        self.last = nn.Linear(256 + 12, 128)
        self.depth1 = nn.Linear(128,64)
        self.depth2 = nn.Linear(64,1)
    def forward(self, image, features):
        x = self.model(image)
        x = self.last(torch.cat([x, features], dim=1))
        x = self.depth1(x)
        x = self.depth2(x)
        return x
    
    

In [8]:
import pandas as pd
import numpy as np
from sklearn import model_selection
df = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
df["kfold"] = -1

df = df.sample(frac=1).reset_index(drop=True)

kf = model_selection.StratifiedKFold(n_splits=5, shuffle=False)

for fold, (train_idx, val_idx) in enumerate(kf.split(X=df,y=df.Pawpularity.values)):
    print(len(train_idx), len(val_idx))
    df.loc[val_idx, 'kfold'] = fold

7929 1983
7929 1983
7930 1982
7930 1982
7930 1982




In [9]:
import os
from sklearn import metrics
import albumentations
device = 'cuda'
epochs = 16
data_path = '../input/petfinder-pawpularity-score'
train_aug = albumentations.Compose(                  ##  AUGMENTATIONs TAKEN FROM ABHISHEK THAKUR's tez Pawpular training
    [
        albumentations.Resize(224,224, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
#         albumentations.RandomBrightnessContrast(
#             brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
#         ),
#         albumentations.HorizontalFlip(p=0.4),         ##  THis part is from  Manav  check out his NB
#          albumentations.VerticalFlip(p=0.3),
#         albumentations.ShiftScaleRotate(
#                 shift_limit = 0.1, scale_limit=0.1, rotate_limit=45, p=0.5
#             ),
    ],
    p=1.0,
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(224, 224, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [10]:
feats = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

In [11]:
from itertools import chain
scores = []
for fold in range(5):
        model = get_model()
        model.to(device)
        df_train = df[df.kfold != fold].reset_index(drop=True)
        df_valid = df[df.kfold == fold].reset_index(drop=True)

        df_train = df_train.drop(columns = 'kfold')
        df_valid = df_valid.drop(columns = 'kfold')

        train_images = df_train.Id.values.tolist()
        train_images = [os.path.join(data_path,'train',i + '.jpg') for i in train_images]
        valid_images = df_valid.Id.values.tolist()
        valid_images = [os.path.join(data_path,'train',i + '.jpg') for i in valid_images]

        train_targets = df_train.Pawpularity.values/100
        valid_targets = df_valid.Pawpularity.values/100

        train_dataset = CustomDataset(image_path = train_images,features=df_train[feats].values,targets = train_targets,augmentations=train_aug)
        train_loader = torch.utils.data.DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True) 
        valid_dataset = CustomDataset(image_path = valid_images,features=df_valid[feats].values,targets =valid_targets,augmentations=valid_aug)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,batch_size=32,shuffle=False,pin_memory=True) 

        optimizer = torch.optim.Adam(model.parameters(),lr=5e-5)
#         scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='min',patience=5,verbose=True)
        print(f'============================== FOLD -- {fold} ==============================')
        for epoch in range(epochs):
            print(f'==================== Epoch -- {epoch} ====================')
            train(model=model,train_loader=train_loader,device=device,optimizer=optimizer)
            
            final_outputs,final_targets = eval(model=model,valid_loader=valid_loader,device=device,optimizer=optimizer)
    
            RMSE = np.sqrt(metrics.mean_squared_error(final_targets,final_outputs))
#             scheduler.step(RMSE)
            
            print(f'valid RMSE={RMSE}')
        torch.save(model.state_dict(),'model-epoch'+str(fold)+'.pth')
        scores.append(RMSE)

Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth" to /root/.cache/torch/hub/checkpoints/swin_base_patch4_window7_224_22k.pth


train BCE loss is 0.6494753473708706
valid BCE loss is 0.6427710652351379
valid RMSE=17.96771532711198
train BCE loss is 0.630789284504229
valid BCE loss is 0.643142819404602
valid RMSE=18.010045121058276
train BCE loss is 0.6073895849047168
valid BCE loss is 0.6565539836883545
valid RMSE=19.287680773416344
train BCE loss is 0.5899396062858643
valid BCE loss is 0.6522985100746155
valid RMSE=18.81821174539346
train BCE loss is 0.5809608534459145
valid BCE loss is 0.6526400446891785
valid RMSE=18.83853839221165
train BCE loss is 0.5762392786001006
valid BCE loss is 0.6506486535072327
valid RMSE=18.64052960362499
train BCE loss is 0.573779515441387
valid BCE loss is 0.6542401313781738
valid RMSE=18.880449331056294
train BCE loss is 0.5724847757768247
valid BCE loss is 0.6521443128585815
valid RMSE=18.707192878769536
train BCE loss is 0.5714007388920553
valid BCE loss is 0.6507833003997803
valid RMSE=18.47729332079907
train BCE loss is 0.5708406627658875
valid BCE loss is 0.647973060607910

In [12]:
print(scores)

[18.288677347008566, 18.445468229849798, 18.859111737442706, 18.207405081589382, 18.210748097075292]


In [13]:
# from itertools import chain
# max = 200
# model_no = 0
# for i in range(5):
#     if score[i] < max:
#         model_no = i
#         max = scores[i]
        
        
# model_f = get_model()
# model_f.to(device)
# model_f.load_state_dict(torch.load('./model-epoch'+str(model_no)+'.pth'))
# data_path = '../input/petfinder-pawpularity-score'
# device = 'cuda'
# df_test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
# test_images = df_test.Id.values.tolist()
# test_images = [os.path.join(data_path,'test',i + '.jpg') for i in test_images]

# test_dataset =  CustomDataset(image_path = test_images,targets = np.ones(len(test_images)),augmentations=valid_aug)
# test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=64,shuffle=False) 


# final_outputs = []
 
# with torch.no_grad():
#     for data in test_loader:
#         inputs = data['image']
#         inputs = inputs.to(device, dtype=torch.float)
#         output = model_f(inputs)
#         output = output.detach().cpu().numpy().tolist()
#         final_outputs.extend(output)
        

# final_outputs = list(chain.from_iterable(final_outputs))        
# submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
# submission['Pawpularity'] = final_outputs
# submission.to_csv('submission.csv',index = False)