# PetFinder.my
- Hola amigos, this notebook covers my code for the **PetFinder.my - Pawpularity Contest**, which can be found [here](https://www.kaggle.com/c/petfinder-pawpularity-score).
- Reference Notebooks:
    - [[Pytorch + W&B] Pawpularity Training](https://www.kaggle.com/debarshichanda/pytorch-w-b-pawpularity-training?scriptVersionId=75559544)
    - [Experiment Tracking with Weights & Biases](https://www.kaggle.com/ayuraj/experiment-tracking-with-weights-and-biases/notebook)
    - [Interactive EDA using W&B Tables](https://www.kaggle.com/ayuraj/interactive-eda-using-w-b-tables)
    - [Continuous Target Stratification](https://www.kaggle.com/tolgadincer/continuous-target-stratification?scriptVersionId=52551118&cellId=6)

<br>

![](https://storage.googleapis.com/kaggle-media/competitions/Petfinder/PetFinder%20-%20Logo.png)

# Installing and Importing Packages

In [None]:
!pip install timm
!pip install --upgrade -q wandb

In [None]:
import os
import gc
import cv2
import copy
import time
import random
from PIL import Image

# Data Manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Util Imports
# https://docs.python.org/3/library/collections.htmlos.environ['CUDA_LAUNCH_BLOCKING'] = "1"
# https://stackoverflow.com/questions/5900578/how-does-collections-defaultdict-work
import joblib 
from tqdm import tqdm
from collections import defaultdict

# Scikit-Learn Imports
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import StratifiedKFold, KFold

# https://rwightman.github.io/pytorch-image-models/
import timm

# Albumentations for Augmentations
# https://albumentations.ai/docs/
import albumentations as A
from albumentations.pytorch import ToTensorV2

# `CUDA_LAUNCH_BLOCKING` make cuda report the error where it actually occurs.
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
# Configuring the Weights & Biases
import wandb
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api")
wandb.login(key = wandb_api)
anony = None

# Training Configuration

In [None]:
ROOT_DIR = "../input/petfinder-pawpularity-score"
TRAIN_DIR = "../input/petfinder-pawpularity-score/train"
TEST_DIR = "../input/petfinder-pawpularity-score/test"

In [None]:
CONFIG = dict(
    seed = 42, model_name = 'tf_efficientnet_b4_ns', train_batch_size = 16,
    valid_batch_size = 32, img_size = 512, epochs = 5, learning_rate = 1e-4,
    scheduler = 'CosineAnnealingLR', min_lr = 1e-6, T_max = 20, T_0 = 25,
    warmup_epochs = 0, weight_decay = 1e-6, n_accumulate = 1, n_fold = 5, 
    num_classes = 1, device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    competition = 'PetFinder', _wandb_kernel = 'ele'
)

# Set Seed for Reproducibility

In [None]:
# Sets the seed for the entire notebook, so that we can reproduce our results
def set_seed(seed = 42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    # If True, causes cuDNN to only use deterministic convolutional algorithms
    torch.backends.cudnn.deterministic = True
    # If True, causes cuDNN to benchmark multiple convolution algorithms and select the fastest
    torch.backends.cudnn.benchmark = False
    
set_seed(CONFIG['seed'])

# Read the Data

In [None]:
def get_train_file_path(id):
    return f"{TRAIN_DIR}/{id}.jpg"

In [None]:
df = pd.read_csv(f"{ROOT_DIR}/train.csv")
df['file_path'] = df['Id'].apply(get_train_file_path)
print(df.shape)

In [None]:
# Finding out the feature columns
feature_cols = [col for col in df.columns if col not in ['Id', 'Pawpularity', 'file_path']]
print(feature_cols)

# Visualization

In [None]:
# https://docs.wandb.ai/ref/python/init
# Creating a Weights & Biases Run for Visualization Purposes
run = wandb.init(project = 'PetFinder', config = CONFIG, 
    job_type = 'Visualization', anonymous = 'must')

In [None]:
# Making the list of columns for W&B Visualization
col_list = list(df.columns)
# No use of 'file_path'
col_list.remove('file_path')
# Adding a new field (Image retrieved with the help of file_path)
col_list.insert(1, 'Image')
print(col_list)

In [None]:
preview_table = wandb.Table(columns = col_list)

# Randomly Sampling 100 points for Visualization
df_temp = df.sample(100, random_state = CONFIG['seed']).reset_index(drop = True)

for i in tqdm(range(df_temp.shape[0])):
    data = df_temp.loc[i]
    img = Image.open(data.file_path)
    preview_table.add_data(data[0], wandb.Image(img), *data[1:-1])
    
wandb.log({'Visualization': preview_table})
run.finish()

In [None]:
# This is just to display the W&B run page in this interactive session.
from IPython import display

# We create an IFrame and set the width & height
iframe = display.IFrame(run.url, width = 1080, height = 720)
iframe

# Creating Folds

In [None]:
# https://pandas.pydata.org/docs/reference/api/pandas.cut.html
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html

def create_folds(df, n_splits = 5, n_groups = None):
    df['kfold'] = 1
    
    # Corresponds to the case when we have a classification setting
    # Will be creating folds on the basis of the target variable simply
    if n_groups is None:
        fold = KFold(n_splits = n_splits, random_state = CONFIG['seed'])
        target = df['Pawpularity']
        
    # Corresponds to the case when we have a regression setting
    # We will bin the target variable first, which will give us a setting similar to that of
    # classification, and then, we will create the folds on the basis of the binned target variable
    else:
        fold = StratifiedKFold(n_splits = n_splits, shuffle = True, random_state = CONFIG['seed'])
        target = pd.cut(df['Pawpularity'], n_groups, labels = False)
        
    for fold_no, (train_indices, val_indices) in enumerate(fold.split(target, target)):
        df.loc[val_indices, 'kfold'] = fold_no
        
    return df

In [None]:
df = create_folds(df, n_splits = CONFIG['n_fold'], n_groups = 14)
df.head()

# Creating the Dataset Class

In [None]:
# By default, the imread function reads the image in BGR format
# cvtColor takes the image from one color space to another color space, in this case, from BGR to RGB
class PawpularityDataset(Dataset):
    def __init__(self, root_dir, df, transforms = None):
        self.root_dir = root_dir
        self.df = df
        self.file_names = df['file_path'].values
        self.targets = df['Pawpularity'].values
        self.meta = df[feature_cols].values
        self.transforms = transforms
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        meta = self.meta[index, : ]
        target = self.targets[index]
        
        if self.transforms:
            img = self.transforms(image = img)["image"]
        
        return img, meta, target

# Defining the Augmentations

In [None]:
# https://albumentations.ai/docs/
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.HorizontalFlip(p = 0.5),
        A.Normalize(),
        ToTensorV2()
    ]),
    "val": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(),
        ToTensorV2()
    ]),
}

# Defining the Model Architecture

In [None]:
class PawpularityModel(nn.Module):
    def __init__(self, model_name, pretrained = True):
        super(PawpularityModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained = pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.reset_classifier(0)
        self.fc = nn.Linear(self.n_features + 12, CONFIG['num_classes'])
        self.dropout = nn.Dropout(p = 0.3)
        
    def forward(self, images, meta):
        # features.shape = (batch_size, num_embeddings)
        features = self.model(images)
        features = self.dropout(features)
        
        # features.shape = (batch_size, num_embeddings + meta)
        features = torch.cat([features, meta], dim = 1)
        
        # outputs = (batch_size, num_classes)
        output = self.fc(features)
        return output
    
model = PawpularityModel(CONFIG['model_name'])
model.to(CONFIG['device'])

# Defining the Loss Function

In [None]:
def criterion(outputs, targets):
    return torch.sqrt(nn.MSELoss()(outputs.view(-1), targets.view(-1)))

# Training Function

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    
    # GradScaler makes the gradient values have a larger magnitude, so that, they don’t flush to zero.
    # https://pytorch.org/docs/stable/amp.html#gradient-scaling
    scaler = amp.GradScaler()
    
    dataset_size = 0
    running_loss = 0.0
    
    # Defining the Iterator for TQDM
    bar = tqdm(enumerate(dataloader), total = len(dataloader))
    for step, (images, meta, targets) in bar:
        images = images.to(device, dtype = torch.float)
        meta = meta.to(device, dtype = torch.float)
        targets = targets.to(device, dtype = torch.float)
        
        # Defining the Batch Size
        batch_size = images.size(0)
        
        # Enabling Autocast for Automatic Mixed Precision 
        # https://developer.nvidia.com/automatic-mixed-precision
        with amp.autocast(enabled = True):
            outputs = model(images, meta)
            loss = criterion(outputs, targets)
            loss = loss / CONFIG['n_accumulate']
        scaler.scale(loss).backward()
        
        # When we have to train large models, and use small batch sizes, it takes a lot of computation
        # time. In order to reduce that, we can do backprop after every few steps, instead of doing it
        # after every step. In other words, we accumulate gradients for a 'n_accumulate' steps, and then
        # we perform back-prop. 
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            scaler.step(optimizer)
            scaler.update()
            
            # Zero out the Paraneter Gradients
            optimizer.zero_grad()
            
            if scheduler is not None:
                scheduler.step()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        epoch_loss = running_loss / dataset_size
        
        # set_postfix allows us to display 
        # https://github.com/tqdm/tqdm
        bar.set_postfix(Epoch = epoch, Train_Loss = epoch_loss, LR = optimizer.param_groups[0]['lr'])
        
    # All objects regardless of how long they have been in memory are considered for collection.
    # However, objects that are referenced in managed code are not collected. Use this method to
    # force the system to try to reclaim the maximum amount of available memory.
    gc.collect()
    
    return epoch_loss

# Validation Function

In [None]:
# Since, we don't want to train the model while iterating on the validation set, hence we have 
# diasbled the gradient calculations in this function.
# https://pytorch.org/docs/stable/generated/torch.no_grad.html
@torch.no_grad()

def val_one_epoch(model, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    TARGETS = []
    PREDS = []
    
    # Defining the Iterator for TQDM
    bar = tqdm(enumerate(dataloader), total = len(dataloader))
    for step, (images, meta, targets) in bar:
        images = images.to(device, dtype = torch.float)
        meta = meta.to(device, dtype = torch.float)
        targets = targets.to(device, dtype = torch.float)
        
        # Defining the Batch Size
        batch_size = images.size(0)
        
        outputs = model(images, meta)
        loss = criterion(outputs, targets)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        epoch_loss = running_loss / dataset_size
        
        PREDS.append(outputs.view(-1).cpu().detach().numpy())
        TARGETS.append(targets.view(-1).cpu().detach().numpy())
        
        # set_postfix allows us to display 
        # https://github.com/tqdm/tqdm
        bar.set_postfix(Epoch = epoch, Val_Loss = epoch_loss, LR = optimizer.param_groups[0]['lr'])
    
    TARGETS = np.concatenate(TARGETS)
    PREDS = np.concatenate(PREDS)
    val_rmse = MSE(TARGETS, PREDS, squared=False)
    
    # All objects regardless of how long they have been in memory are considered for collection.
    # However, objects that are referenced in managed code are not collected. Use this method to
    # force the system to try to reclaim the maximum amount of available memory.
    gc.collect()
    
    return epoch_loss, val_rmse

# Run Training

In [None]:
# train_loader & val_loader are initialized before this function is called
def run_training(model, optimizer, scheduler, device, num_epochs):
    # To automatically log gradients
    # https://docs.wandb.ai/ref/python/watch
    wandb.watch(model, log_freq = 100)
    
    if torch.cuda.is_available():
        print('[INFO] Using GPU: {}\n'.format(torch.cuda.get_device_name()))
        
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_rmse = np.inf
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs+1):
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, dataloader = train_loader,
            device = CONFIG['device'], epoch = epoch)
        val_epoch_loss, val_epoch_rmse = val_one_epoch(model, dataloader = val_loader, 
             device = CONFIG['device'], epoch = epoch)
        
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Valid RMSE'].append(val_epoch_rmse)
    
        # Log the Metrics
        wandb.log({"Train Loss": train_epoch_loss})
        wandb.log({"Val Loss": val_epoch_loss})
        wandb.log({'Valid RMSE': val_epoch_rmse})
        
        print("Val RMSE:", val_epoch_rmse)
        
        # Deep Copy the Model
        if val_epoch_rmse <= best_epoch_rmse:
            print(f"Validation Loss Improved ({best_epoch_rmse} - {val_epoch_rmse})")
            best_epoch_rmse = val_epoch_rmse
            run.summary["Best RMSE"] = best_epoch_rmse
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "RMSE{:.4f}_epoch{:.0f}.bin".format(best_epoch_rmse, epoch)
            torch.save(model.state_dict(), PATH)
            
            # Save a model file from the curreny directory
            wandb.save(PATH)
            print("Model Saved")
            
        print()
        
    end = time.time()
    time_elapsed = end - start
    print("Training complete in {:.0f}h {:.0f}m {:.0f}s".format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best RMSE: {:.4f}".format(best_epoch_rmse))
    
    # Load the best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [None]:
def prepare_loaders(fold):
    df_train = df[df.kfold != fold].reset_index(drop = True)
    df_val = df[df.kfold != fold].reset_index(drop = True)
    
    train_dataset = PawpularityDataset(TRAIN_DIR, df_train, transforms = data_transforms['train'])
    val_dataset = PawpularityDataset(TRAIN_DIR, df_val, transforms = data_transforms['train'])
    
    train_loader = DataLoader(train_dataset, batch_size = CONFIG['train_batch_size'],
        num_workers = 4, shuffle = True, pin_memory = True, drop_last = True)
    val_loader = DataLoader(val_dataset, batch_size = CONFIG['valid_batch_size'],
        num_workers = 4, shuffle = True, pin_memory = True, drop_last = True)
    
    return train_loader, val_loader

In [None]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max = CONFIG['T_max'], 
            eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = CONFIG['T_0'], 
                                                             eta_min = CONFIG['min_lr'])
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

# Training & Inferencing

In [None]:
# Create Dataloaders
train_loader, val_loader = prepare_loaders(fold = 0)

In [None]:
# Define Optimizer & Scheduler
optimizer = optim.Adam(model.parameters(), lr = CONFIG['learning_rate'], 
    weight_decay = CONFIG['weight_decay'])
scheduler = fetch_scheduler(optimizer)

In [None]:
# Creating the training run for Weights & Biases
run = wandb.init(project = 'PetFinder', config = CONFIG, job_type = 'Train', anonymous = 'must')

In [None]:
# Start Training
model, history = run_training(model, optimizer, scheduler, CONFIG['device'], CONFIG['epochs'])

In [None]:
# Finishing the Run
run.finish()

# Visualizations

In [None]:
# This is just to display the W&B run page in this interactive session.
from IPython import display

# We create an IFrame and set the width and height
iframe = display.IFrame(run.url, width = 1000, height = 720)
iframe