In [1]:
import sys
import numpy as np

import pandas as pd
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings('ignore')
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
plt.style.use('ggplot')
plt.rcParams["font.family"] = "Times New Roman"

In [2]:
import torch
import torchvision
from torchvision import models
from torch import nn
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm_notebook
import torchvision.transforms as T
from sklearn.model_selection import train_test_split
import time
import cv2
from PIL import Image
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import MinMaxScaler
import timm
import tez
import os
import glob

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
PATH = '/kaggle/input/petfinder-pawpularity-score/'
IMG_SIZE = 384
BATCH_SIZE = 12
model_name = 'swin_large_patch4_window12_384'

In [5]:
df_train = pd.read_csv('../input/train-data-10/10FOLDSTRAIN.csv')
df_test = pd.read_csv(PATH + 'test.csv')
df_train.head(2)

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,kfold
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63,1
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42,5


In [9]:
# *************************************************
FOLD = 0
# *************************************************

In [10]:
X_train = df_train.loc[df_train['kfold'] != FOLD]
X_valid = df_train.loc[df_train['kfold'] == FOLD]
print(len(X_train), len(X_valid))

8870 988


In [11]:
augm_train = T.Compose([T.ToPILImage(),
#                         T.RandomHorizontalFlip(p=0.2),
                        T.Resize([IMG_SIZE, IMG_SIZE]),
                        T.ToTensor(),
                        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                       ])

augm_test = T.Compose([T.ToPILImage(),
                       T.Resize([IMG_SIZE, IMG_SIZE]),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                      ])

In [12]:
class PawDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_path='train/', augm=False):
        self.df = df.copy()
        self.augm = augm
        self.img_path = PATH + image_path


    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image = cv2.imread(self.img_path + str(self.df.iloc[idx, 0]) + '.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.augm(image)

        target = self.df.iloc[idx, -1:]
        features = torch.FloatTensor(self.df.iloc[idx, 1:-1].values.astype('float32'))
        target = torch.FloatTensor(target.values.astype('float32'))
    

        return image, features, target/100

In [13]:
train_data = PawDataset(X_train.drop(columns='kfold'), augm=augm_train)
valid_data = PawDataset(X_valid.drop(columns='kfold'), augm=augm_test)
test_data = PawDataset(df_test, image_path='test/', augm=augm_test)

In [14]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=8)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=BATCH_SIZE*4, shuffle=False, num_workers=8)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE*4, shuffle=False, num_workers=8)

In [15]:
model_ = timm.create_model(model_name, pretrained=True, in_chans=3)

Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22kto1k.pth" to /root/.cache/torch/hub/checkpoints/swin_large_patch4_window12_384_22kto1k.pth


In [16]:
for param in model_.parameters():
    param.requires_grad = False
model_.head = nn.Linear(1536, 128)
model_.head.requires_grad = True

In [17]:
class MyNetFeat(tez.Model):
    def __init__(self, base_model):
        super().__init__()
        self.model = base_model
        self.dropout2 = nn.Dropout(0.1)
        self.dense1 = nn.Linear(147, 72)
        self.dense2 = nn.Linear(72, 1)


    def forward(self, image, features, targets=None):
        x = self.model(image)
        x2 = torch.cat([x, features], dim=1)
        x = self.dense1(x2)
#         x = self.dropout2(x)
        x = F.relu(x)
        x = self.dense2(x)
        return x, x2

In [18]:
# net = torch.load('../input/my-img-models/img_models/only_img_model_0_1785.pht')

In [19]:
net2 = MyNetFeat(base_model=model_)
net2.to(device)

MyNetFeat(
  (model): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (0): BasicLayer(
        dim=192, input_resolution=(96, 96), depth=2
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=192, out_features=576, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=192, out_features=192, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
             

In [20]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def rmse_func(y_pred, y_true):
    y_ = 100 * y_true
    pred_ =  sigmoid(y_pred)*100
    return np.sqrt(mse(y_, pred_))

In [21]:
def save_model(model, mod_path, prefix, metric, verbose=True):
    path_1 = mod_path + prefix   
    for f in glob.glob(path_1+"*"):
        os.remove(f)
    full_path = path_1 + str(round(metric*100)) +'.pht'
    torch.save(model, full_path)
    if verbose:
        print(f'Model saved with metric: {round(metric,2)}')

In [22]:
def model_eval(model, opt, loss_func, dataset_loader):

    num_batches = len(dataset_loader)
    test_loss = 0
    rmse = 0
    full_y = np.array([])
    full_pred = np.array([])
    with torch.no_grad():
        for X, features, y in dataset_loader:
            pred, _ = model(X.to(device), features.to(device))
            test_loss += criterion(pred, y.to(device)).item()
            full_y = np.append(full_y, y.cpu().numpy())
            full_pred = np.append(full_pred, pred.cpu().numpy())


        test_loss = round(test_loss / num_batches, 4)
        rmse = rmse_func(full_pred, full_y)
        

    return test_loss, rmse

In [23]:
def train_step(epoch, train_start_time, model, opt, loss_func, train_loader, step, best_metric):

    time_start = time.time()
    running_loss = 0
    num_batches_train = len(train_loader)

    for num, data in enumerate(train_loader):
        images, features, labels = data[0].to(device), data[1].to(device), data[2].to(device)

        opt.zero_grad()  # обнуляем градиент
        outputs, _ = model(images, features)  # получаем предсказания
        loss = loss_func(outputs, labels)  # считаем потери
        loss.backward()  # ОРО
        opt.step()  # обновление весов

        running_loss += loss.item()

        if (num+1) % step == 0:
            test_loss, rmse = model_eval(model, opt, loss_func, valid_loader)
#             save_model(net, mod_path='./' , prefix='only_img_model', metric=rmse)
            print(
            f'IT № {num+1}/{num_batches_train}, tr_loss= {round(running_loss/(num+1), 4)}, eval_loss= {test_loss},',
            f'rmse = {round(rmse,3)}'
        )
            if rmse < best_metric:
                best_metric = rmse
                save_model(model, mod_path='./', prefix=f'full_mod_{FOLD}_', metric=best_metric)

    test_loss, rmse = model_eval(model, opt, loss_func, valid_loader)
    train_loss = round(running_loss / num_batches_train, 4)

    if (epoch+1) % 1 == 0:
        time_taken = round(time.time() - time_start, 1)
        from_start = round(time.time() - train_start_time, 1)
        print(
            f'Epoch № {epoch+1}, tr_loss= {train_loss}, eval_loss= {test_loss},',
            f'rmse = {rmse}',
            f'\ntime_per_epoch = {time_taken} sec, total ---> {from_start} sec'
        )
    return rmse, best_metric

In [24]:
LR = 0.001
EPOCHES = 8
criterion  = nn.BCEWithLogitsLoss()
opt = optim.Adam(net2.parameters(), lr=LR)
scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=[1,2,3,4,5], gamma=0.5)

In [25]:
train_start = time.time()
rmse_history = []
best_metric = 1000
step = 300
for epoch in tqdm_notebook(range(EPOCHES)):
    print('*'*40, f'EPOCH № {epoch+1}', '*'*40)
    rmse, best_metric = train_step(epoch,
                      train_start,
                      net2,
                      opt,
                      criterion,
                      train_loader,
                      step,
                      best_metric
                     )

        
    if best_metric < 20:
        step = 60
        if best_metric < 18.6:
            step = 20
            if (best_metric < 18.1) or (epoch > 4):
                step = 20
            
        
        
    rmse_history.append(rmse)
    scheduler.step()
print('*'*100)
print(f'Training is finished! Best score: {min(rmse_history)} Time taken, sec:',
      time.time()-train_start)

  0%|          | 0/8 [00:00<?, ?it/s]

**************************************** EPOCH № 1 ****************************************
IT № 300/739, tr_loss= 0.6521, eval_loss= 0.6476, rmse = 18.65
Model saved with metric: 18.65
IT № 600/739, tr_loss= 0.6492, eval_loss= 0.6428, rmse = 17.981
Model saved with metric: 17.98
Epoch № 1, tr_loss= 0.6484, eval_loss= 0.6459, rmse = 18.35956968602556 
time_per_epoch = 589.7 sec, total ---> 589.8 sec
**************************************** EPOCH № 2 ****************************************
IT № 20/739, tr_loss= 0.6421, eval_loss= 0.6441, rmse = 18.203
IT № 40/739, tr_loss= 0.6407, eval_loss= 0.6436, rmse = 18.123
IT № 60/739, tr_loss= 0.6397, eval_loss= 0.6466, rmse = 18.492
IT № 80/739, tr_loss= 0.6363, eval_loss= 0.6467, rmse = 18.428
IT № 100/739, tr_loss= 0.6366, eval_loss= 0.6447, rmse = 18.255
IT № 120/739, tr_loss= 0.6352, eval_loss= 0.6449, rmse = 18.3
IT № 140/739, tr_loss= 0.6362, eval_loss= 0.6442, rmse = 18.173
IT № 160/739, tr_loss= 0.6358, eval_loss= 0.6441, rmse = 18.184

In [29]:
def get_pred(model, loss_func, dataset_loader):

    num_batches = len(dataset_loader)
    test_loss = 0
    full_pred = np.array([])
    with torch.no_grad():
        for X, features, y in dataset_loader:
            pred = model(X.to(device), features.to(device))
            if y is not None:
                test_loss += criterion(pred, y.to(device)).item()
            full_pred = np.append(full_pred, pred.cpu().numpy())
        if y is not None:
            test_loss = round(test_loss / num_batches, 4)
            rmse = round(np.sqrt(test_loss), 2)
            return full_pred, rmse
            

    return full_pred