# Petfinder.my - Pawpularity Contest
Predict the popularity of shelter pet photos
<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/25383/logos/header.png"></img>

Analyze raw images and metadata to predict the “Pawpularity” of pet photos. The Pawpularity Score is derived from each pet profile's page view statistics at the listing pages, using an algorithm that normalizes the traffic data across different pages, platforms (web & mobile) and various metrics.

## Photo Metadata
The train.csv and test.csv files contain metadata for photos in the training set and test set, respectively. Each pet photo is labeled with the value of 1 (Yes) or 0 (No) for each of the following features:

- Focus - Pet stands out against uncluttered background, not too close / far.
- Eyes - Both eyes are facing front or near-front, with at least 1 eye / pupil decently clear.
- Face - Decently clear face, facing front or near-front.
- Near - Single pet taking up significant portion of photo (roughly over 50% of photo width or height).
- Action - Pet in the middle of an action (e.g., jumping).
- Accessory - Accompanying physical or digital accessory / prop (i.e. toy, digital sticker), excluding collar and leash.
- Group - More than 1 pet in the photo.
- Collage - Digitally-retouched photo (i.e. with digital photo frame, combination of multiple photos).
- Human - Human in the photo.
- Occlusion - Specific undesirable objects blocking part of the pet (i.e. human, cage or fence). Note that not all blocking objects are considered occlusion.
- Info - Custom-added text or labels (i.e. pet name, description).
- Blur - Noticeably out of focus or noisy, especially for the pet’s eyes and face. For Blur entries, “Eyes” column is always set to 0.

# Importing Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
TRAIN_PATH = "../input/petfinder-pawpularity-score/train/"
TEST_PATH = "../input/petfinder-pawpularity-score/test/"

IMAGE_SIZE = 224
BATCH_SIZE = 100
NO_OF_EPOCHS = 100

In [None]:
train_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test_df = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

# Adding paths for images
train_df["Path"] = TRAIN_PATH + train_df["Id"] + ".jpg"
test_df["Path"] = TEST_PATH + test_df["Id"] + ".jpg"
train_df.head()

In [None]:
test_df.head()

In [None]:
print("Training set instances:", train_df.shape[0])
print("Testing set instances:", test_df.shape[0])

# Creating keras Dataset

In [None]:
def generate_image_dataset(filepath, label=None):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3) 
    iamge = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return image, label


train, valid = train_test_split(train_df, test_size=0.1)
dataset_train = (tf.data.Dataset.from_tensor_slices((train["Path"].values,
                                                    train["Pawpularity"].values )).
                 map(generate_image_dataset).batch(BATCH_SIZE).prefetch(1))
dataset_valid = (tf.data.Dataset.from_tensor_slices((valid["Path"].values,
                                                    valid["Pawpularity"].values)).
                map(generate_image_dataset).batch(BATCH_SIZE).prefetch(1))

In [None]:
data_augmentation = keras.Sequential([
        keras.layers.experimental.preprocessing.Normalization(),
        keras.layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
        keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
        # random rotation in range [-20% * 2pi, 20% * 2pi]
        keras.layers.experimental.preprocessing.RandomRotation(factor=0.02), 
        # A positive value means zooming out, while a negative value means zooming in.
        keras.layers.experimental.preprocessing.RandomZoom(
            height_factor=0.2, width_factor=0.2
        # output zoomed out vertically in range [20%, 20%]
        # output zoomed out horizontally in the range [20%, 20%]
        )
    ],
    name="data_augmentaion"
)

In [None]:
def train_experimental_model(model, optimizer,
                             checkpoint_name=None):
    model.compile(
        optimizer=optimizer,
        loss=keras.losses.MeanSquaredError(),
        metrics=[
            keras.metrics.RootMeanSquaredError(name="rmse"),
            "mae",
        ]
    )
    if checkpoint_name==None:
        raise ValueError("Provide checpoint model name.")
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_name,
        monitor="val_rmse",
        save_best_only=True,
        save_weights_only=True
    )
    early_stopping_callback = keras.callbacks.EarlyStopping(
        patience=10,
        restore_best_weights=True
    )
    
    history = model.fit(
        dataset_train,
        validation_data=dataset_valid,
        batch_size=BATCH_SIZE,
        epochs=NO_OF_EPOCHS,
        callbacks=[checkpoint_callback, early_stopping_callback]
    )
    return history

# Let's train a ResNet-34 CNN

In [None]:
class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            keras.layers.Conv2D(filters, 3, strides=strides, 
                                padding="same", use_bias=False),
            keras.layers.BatchNormalization(),
            self.activation,
            keras.layers.Conv2D(filters, 3, strides=1, padding="same",
                               use_bias=False),
            keras.layers.BatchNormalization()
        ]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                keras.layers.Conv2D(filters, 1, strides=strides,
                                    padding="same", use_bias=False),
                keras.layers.BatchNormalization()
            ]
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "activation": self.activation,
                "main_layers": self.main_layers,
                "skip_layers": self.skip_layers}

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z+skip_Z)
    

resnet_model = keras.models.Sequential()
resnet_model.add(keras.layers.Conv2D(64, 7, strides=2, input_shape=[224, 224, 3],
                             padding="same", use_bias=False))
resnet_model.add(keras.layers.BatchNormalization())
resnet_model.add(keras.layers.Activation("relu"))
resnet_model.add(keras.layers.MaxPool2D(pool_size=2, strides=2, padding="same"))
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    resnet_model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
resnet_model.add(keras.layers.GlobalAvgPool2D())
resnet_model.add(keras.layers.Flatten())
resnet_model.add(keras.layers.Dense(1))

In [None]:
history = train_experimental_model(resnet_model,optimizer=keras.optimizers.Adam(),
                                   checkpoint_name="resnet_model.h5")

# Let's give Vision-Transformer a try

In [None]:
learning_rate = 0.001
weight_decay = 0.0001      
patch_size = 16         # P: size of the patches to be extracted from the input images
num_patches = (IMAGE_SIZE // IMAGE_SIZE) ** 2 # N = HW/P^2; considering H=W=S, 
                                             # then formula becomes N = (S/P)^2

projection_dim = 64    # The dimension a patch will be projected into
num_heads = 4
transformer_units = [
    projection_dim * 2,
    projection_dim
]                             # Size of transformer layers
transformer_layers = 8        # Lx
mlp_head_units = [2048, 1024] # Size of the dense layers of the final classifier

In [None]:
class Patches(keras.layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,                                    # A 4-D tensor
            sizes=[1, self.patch_size, self.patch_size, 1],   # The size of the extracted patches
            strides=[1, self.patch_size, self.patch_size, 1], # How far the centers of two consecutive patches are in the images
            rates=[1, 1, 1, 1],                               # This is the input stride, specifying how far two
                                                              # consecutive patch samples are in the input. 
            padding="VALID"
        )
        patch_dims = patches.shape[-1]
        # print(f"Patches shape: {patches.shape}")
        # print(f"Patches reshaped shape: {tf.reshape(patches, [batch_size, -1, patch_dims]).shape}")
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "patch_size": self.patch_size}
    

def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = keras.layers.Dense(units, activation=tf.nn.gelu)(x)
        x = keras.layers.Dropout(dropout_rate)(x)
    return x

In [None]:
class PatchEncoder(keras.layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = keras.layers.Dense(units=projection_dim)
        self.position_embedding = keras.layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "num_patches": self.num_patches,
                "projection": self.projection}

In [None]:
def create_vit_regressor():
    inputs = keras.layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    # Augment data
#     augmented_input = data_augmentation(inputs)
    # Create patches
    patches = Patches(patch_size)(inputs)
    # Encode patches to add positional embedding to the projected patch
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block (Lx)
    for _ in range(transformer_layers):
        # Layer normalization 1
        x1 = keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer
        attention_output = keras.layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection
        x2 = keras.layers.Add()([attention_output, encoded_patches])
        
        # Layer normalization 2
        x3 = keras.layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2
        encoded_patches = keras.layers.Add()([x3, x2])
    
    # Create a [batch_size, projection_dim] tensor
    representation = keras.layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = keras.layers.Flatten()(representation)
    representation = keras.layers.Dropout(0.5)(representation)
    # Add MLP
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    output = keras.layers.Dense(units=1)(features)
    # Create the keras model
    model = keras.Model(inputs=inputs, outputs=output)
    return model

vit_regressor = create_vit_regressor()
history = train_experimental_model(vit_regressor,optimizer=keras.optimizers.Adam(),
                                   checkpoint_name="vit_model.h5")

# Submission 

In [None]:
def generate_test_dataset(filepath):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3) 
    iamge = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return image, 0


def generate_submission_csv(model):
    dataset_test = (tf.data.Dataset.from_tensor_slices(test_df["Path"].values).
                    map(generate_test_dataset).batch(BATCH_SIZE).prefetch(1))
    submission_df = test_df[["Id"]]
    submission_df = submission_df.assign(Pawpularity=model.predict(dataset_test).reshape(-1))
    submission_df.to_csv("submission.csv", index=False)
    
generate_submission_csv(resnet_model)