# PetFinder (PyTorch + Albumentations) 🐶

> ## (A beginer/learner approach)
>  #### Task : Given images and raw data try to predict the **Pawpularity** i.e the perception of consumer that he/she/they can get attracted and thus adopt them.



## This notebook will cover :-
> #### 1. Data Preprocessing (How to deal with image classification data)
> #### 2. Preparing data for PyTorch utils (Datasets and Dataloaders etc.)
> #### 3. Image Augmentations using Albumentations (and how to incorporate with your training data)
> #### 4. Modeling and Validations (using PyTorch mainly)
> #### 5. Preparing data for submissions
> #### 6. Trying more deep stuff (More techniques such as stacking or trying advanced notebooks techniques from others 😁)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import torch
import torchvision
from torch.utils.data import DataLoader
import albumentations
from albumentations.pytorch import ToTensorV2
import cv2

## Getting Data

In [None]:
train_df = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_df = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
train_df.head()

In [None]:
train_df.info()

In [None]:
test_df.info()

## Little preprocessing

In [None]:
# for getting filename
def file_path(name):
    folder = "../input/petfinder-pawpularity-score/train"
    filename = str(name) + '.jpg'
    path = os.path.join(folder, str(filename))
    return path

In [None]:
## Appending file path in Dataframe
train_df['image_path'] = train_df['Id'].apply(lambda x: file_path(x))
train_df.head()

## Making a Pytorch dataset class

In [None]:
class pet_dataset:
    def __init__(self, image_paths, targets, augmentations):
        self.image_paths = image_paths
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        target = self.targets[idx]
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image = image)
            image = augmented["image"]
        
        image = np.transpose(image, (2,0,1))
        return {
            "image": torch.tensor(image),
            "target": torch.tensor(target)
        }

## Albumentations (Will try more operations too still learning 😅)
### Various Operations tried
> * Shifting scale
> * Random Cropping
> * Centre Cropping
> * Horizontal Flip (Avoiding Vertical coz that doesn't make sense in pet data)
> * RGB shift
> * Transforms such as Channel Shuffle, Inversion, Blur, Colorjitter
> * Can try more random stuff


In [None]:
augmentations = albumentations.Compose(
    [
        albumentations.SmallestMaxSize(max_size = 750),
        albumentations.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=20, p=0.5, border_mode= cv2.BORDER_CONSTANT),
        albumentations.OneOf(
            [
                albumentations.RandomCrop(height = 720, width =720, p = 0.7 ),
                albumentations.CenterCrop(height= 720, width = 720, p =0.3)
            ], p = 1.0
        ),
        albumentations.HorizontalFlip(p=0.5),
        albumentations.RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.5),
        albumentations.RandomBrightnessContrast(p=0.5),
        albumentations.OneOf(
            [
                albumentations.Blur(blur_limit = 3, p=0.5),
                albumentations.ColorJitter(p=0.5)
            ], p = 0.5
        ),
        albumentations.OneOf(
            [
                albumentations.ChannelShuffle(p = 0.5),
                albumentations.InvertImg(p = 0.5)
            ], p = 0.25
        ),
    ]
)

In [None]:
images = np.array(train_df['image_path'])
targets = np.array(train_df['Pawpularity'])
train_dataset = pet_dataset(images, targets, augmentations)

In [None]:
fig, axes = plt.subplots(nrows = 3, ncols = 5, figsize = (20,10))
plt.suptitle("Given Pictures", fontsize = 16)

for i in range(0,15):
    image = cv2.imread(train_df['image_path'][i])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
    x = i//5
    y = i%5
        
    axes[x, y].imshow(image, cmap = plt.cm.bone)
    axes[x, y].axis('off')

In [None]:
fig, axes = plt.subplots(nrows = 3, ncols = 5, figsize = (20,10))
plt.suptitle("Augmented Pictures", fontsize = 16)

for i in range(0,15):
    image = train_dataset[i]['image'].permute(1,2,0)
        
    x = i//5
    y = i%5
        
    axes[x, y].imshow(image, cmap = plt.cm.bone)
    axes[x, y].axis('off')

## Work under construction... (Model Training, validation, tuning, different augmentation techniques etc.)


