In [1]:
import pandas as pd
import numpy as np
import torch
from timm import create_model

In [2]:
import sys
import os
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [3]:
USER_PATH = '/home/dmitry/'

In [4]:
from fastai.vision.all import (
    Path,
    set_seed,
    ImageDataLoaders,
    RegressionBlock,
    Resize,
    setup_aug_tfms,
    Brightness,
    Contrast,
    Hue,
    Saturation,
    Learner,
    MSELossFlat,
)

In [5]:
SEED = 0
set_seed(SEED, reproducible=True)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True


BATCH_SIZE = 24 # 1080Ti only has 11Gb, so batch size of 24 is around maximum

In [6]:
dataset_path = Path('../input/petfinder-pawpularity-score/')

In [7]:
train_df = pd.read_csv(dataset_path/'train.csv')
test_df = pd.read_csv(dataset_path/'test.csv')

In [8]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')

train_df = train_df.drop(columns=['Id'])

train_df.shape

(7929, 14)

In [9]:
train_df['norm_score'] = train_df['Pawpularity'] / 100

In [10]:
model_path = Path(USER_PATH) / '.cache/torch/hub/checkpoints/'

In [11]:
# Needed to copy model weights with no internet from local Kaggle dataset
if not os.path.exists(model_path):
    os.makedirs(model_path)
!cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' {model_path / 'swin_large_patch4_window7_224_22kto1k.pth'}

cp: cannot stat '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth': No such file or directory


In [12]:
def petfinder_rmse(input,target):
    return 100 * torch.sqrt(F.mse_loss(input.flatten(), target))

In [13]:
def get_data(train_df):
    train_df_f = train_df.copy()
    
    dls = ImageDataLoaders.from_df(
        train_df_f, 
        valid_pct=0,
        seed=SEED, 
        fn_col='path',
        label_col='norm_score', 
        y_block=RegressionBlock, 
        bs=BATCH_SIZE,
        num_workers=4,
        item_tfms=Resize(224),
        batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])
    )
    
    return dls


In [14]:
from torch.nn import MSELoss

def get_learner(train_df):
    data = get_data(train_df)
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c)
    learn = Learner(data, model, loss_func=MSELossFlat(), metrics=petfinder_rmse).to_fp16()
    return learn

In [15]:
import gc

In [16]:
learn = get_learner(train_df)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [17]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, 1e-4) 
learn = learn.to_fp32()

epoch,train_loss,valid_loss,petfinder_rmse,time
0,0.032697,,,06:33


  warn("Your generator is empty.")


In [18]:
test_dl = learn.dls.test_dl(test_df)

In [19]:
preds, _ = learn.get_preds(dl=test_dl)

In [21]:
submission = test_df[['Id']].copy()

In [22]:
raw_score = preds.float().numpy()
submission['Pawpularity'] = (np.clip(raw_score, 0, 1)) * 100
submission.to_csv('submission.csv', index=False)

In [23]:
pd.read_csv('submission.csv').head()

Unnamed: 0,Id,Pawpularity
0,ee51b99832f1ba868f646df93d2b6b81,56.451694
1,caddfb3f8bff9c4b95dbe022018eea21,43.013252
2,582eeabd4a448a53ebb79995888a4b0b,38.24806
3,afc1ad7f0c5eea880759d09e77f7deee,29.182768
4,d5bdf3446e86ce4ec67ce7a00f1cccc2,29.313274


# Local run score

In [24]:
from sklearn.metrics import mean_squared_error 
mean_squared_error(submission['Pawpularity'], test_df['Pawpularity']) ** 0.5

18.038510297244233