# Inference

Because this competition has a hidden test set - we need to set up inference note-book in order to submit our models. 

# Prerequisite 

The additional data have to be added via kaggle's dataset 

- Petfinder data (test data is unavailable)
- attrdictw wheels (../input/attrdictw/attrdict-2.0.1-py2.py3-none-any.whl)
- pretrained-model's pth weights (because internet is not available) (import someone's timm-pretrained-weights)
- timmaster (for timm )
- add model weights
- add utility scripts

# Load libraries 

No need for pip because we installed them via dataset ^

In [1]:
! pip install ../input/attrdictw/attrdict-2.0.1-py2.py3-none-any.whl

In [2]:
import sys
sys.path.append("../input/timmmaster/")

In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import matplotlib.pyplot as plt
import os
import tqdm

import seaborn as sns
from torchvision.io import read_image
import torchvision.transforms as T
from torchvision.utils import make_grid
from attrdict import AttrDict
import torch
import yaml
from sklearn.model_selection import StratifiedKFold
import copy
import pickle
# from tqdm import tqdm_notebook

# additional lightning 

import pytorch_lightning as pl
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.progress import ProgressBarBase
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule, LightningModule


# pytorch
import torch
from torch import nn
import torch.nn.functional as F
from timm import create_model


from pawnet_utility import *

# Load test data 

In [4]:
def inference_test(model,valid_loader,criterion,device= "cpu"):
    """
    performs inference for submission. Note that because
    this is test, there is no actual labels
    """
    model.eval()
    y_valid = []
    y_pred_valid = []
    for i, (x,y) in enumerate(valid_loader):
        with torch.no_grad():
            pred = model(x.to(device))
            pred = torch.sigmoid(pred) * 100.
            y_pred_valid.append(pred.squeeze().detach().cpu())
            y_valid.append(y.detach().cpu())
    # convert from list to tensor
    y_valid = torch.cat(y_valid,0)
    y_pred_valid = torch.cat(y_pred_valid,0)
    if criterion is None:
        valid_loss = None
    else:
        
        valid_loss = criterion(y_pred_valid,y_valid).item()
    
    return valid_loss,y_pred_valid

In [5]:
# this is specific to kaggle
# if running in GCS, replace with our GCP bucket 
# get cache location of the dataset 
# GCS_DS_PATH = KaggleDatasets().get_gcs_path()
base_config_manager = BaseConfigLoader("../input/config/config.yaml")
model_config = base_config_manager.load_config().model.swin_tiny4_w7_224

file_path = base_config_manager.load_config().filepath.kaggle #"/kaggle/input/petfinder-pawpularity-score/"
test_df = pd.read_csv(os.path.join(file_path,"test.csv"))

In [6]:
test_transformation = T.Compose([
                T.Resize([224,224]),# imgnet needs at least 224
                T.ConvertImageDtype(torch.float),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), # imgnet requirements 
                ]
            )


test_data = pawnetDataset(annotation_df=test_df,img_dir =os.path.join(file_path,"test"), transform = test_transformation,test=True)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=64,shuffle=False,num_workers=2)

# Load weights and inference

https://pytorch-lightning.readthedocs.io/en/latest/common/weights_loading.html

In [7]:
# create model and load checkpoint 

# load config
# this object manages all the configurations

# base_config_manager = BaseConfigLoader("../input/config/config.yaml")
criterion = torch.nn.BCEWithLogitsLoss()

In [8]:
# load and weights - will fail hre because no internet
try:
    model = pawNetBasic.load_from_checkpoint(checkpoint_path=f"../input/swinmixup04weights/pawnet_lightning_swin_tiny4_w7_224_mixup04/default/version_1/checkpoints/best_loss.ckpt",criterion=criterion,model_config=model_config)
except:
    print("no internet... cannot download weights... ")    

In [9]:
# copy weights to torch cache

! cp ../input/swin-tiny-patch-original-weights/swin_tiny_patch4_window7_224.pth /root/.cache/torch/hub/checkpoints/

In [11]:

# predict
# create empty array
pred_all = np.zeros(len(test_df))

# loop over folds 
for i in range(5):
    print(f"Loading fold {i} weights and perform inference") # 
    model = pawNetBasic.load_from_checkpoint(checkpoint_path=f"../input/swinmixup04weights/pawnet_lightning_swin_tiny4_w7_224_mixup04/default/version_{i}/checkpoints/best_loss.ckpt",criterion=criterion,model_config=model_config)
    model = model.to("cuda")
    _,pred = inference_test(model,test_loader,criterion=None,device="cuda")
    pred_all += pred.numpy()

In [12]:
# create submission
sub = test_df[["Id"]]
sub["Pawpularity"] = pred_all / 5

In [13]:
sub.head()

In [14]:
sub.to_csv("submission.csv", index=False)