# Petfinder.my - Pawpularity Contest
Predict the popularity of shelter pet photos
<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/25383/logos/header.png"></img>

Analyze raw images and metadata to predict the “Pawpularity” of pet photos. The Pawpularity Score is derived from each pet profile's page view statistics at the listing pages, using an algorithm that normalizes the traffic data across different pages, platforms (web & mobile) and various metrics.

## Photo Metadata
The train.csv and test.csv files contain metadata for photos in the training set and test set, respectively. Each pet photo is labeled with the value of 1 (Yes) or 0 (No) for each of the following features:

- Focus - Pet stands out against uncluttered background, not too close / far.
- Eyes - Both eyes are facing front or near-front, with at least 1 eye / pupil decently clear.
- Face - Decently clear face, facing front or near-front.
- Near - Single pet taking up significant portion of photo (roughly over 50% of photo width or height).
- Action - Pet in the middle of an action (e.g., jumping).
- Accessory - Accompanying physical or digital accessory / prop (i.e. toy, digital sticker), excluding collar and leash.
- Group - More than 1 pet in the photo.
- Collage - Digitally-retouched photo (i.e. with digital photo frame, combination of multiple photos).
- Human - Human in the photo.
- Occlusion - Specific undesirable objects blocking part of the pet (i.e. human, cage or fence). Note that not all blocking objects are considered occlusion.
- Info - Custom-added text or labels (i.e. pet name, description).
- Blur - Noticeably out of focus or noisy, especially for the pet’s eyes and face. For Blur entries, “Eyes” column is always set to 0.

# Importing Libraries

In [2]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
TRAIN_PATH = "../input/petfinder-pawpularity-score/train/"
TEST_PATH = "../input/petfinder-pawpularity-score/test/"

IMAGE_SIZE = 224
BATCH_SIZE = 100
NO_OF_EPOCHS = 100

In [4]:
train_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test_df = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

# Adding paths for images
train_df["Path"] = TRAIN_PATH + train_df["Id"] + ".jpg"
test_df["Path"] = TEST_PATH + test_df["Id"] + ".jpg"
train_df.head()

In [5]:
test_df.head()

In [6]:
print("Training set instances:", train_df.shape[0])
print("Testing set instances:", test_df.shape[0])

# Creating keras Dataset

In [5]:
def generate_image_dataset(filepath, label=None):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3) 
    iamge = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return image, label


train, valid = train_test_split(train_df, test_size=0.1)
dataset_train = (tf.data.Dataset.from_tensor_slices((train["Path"].values,
                                                    train["Pawpularity"].values )).
                 map(generate_image_dataset).batch(BATCH_SIZE).prefetch(1))
dataset_valid = (tf.data.Dataset.from_tensor_slices((valid["Path"].values,
                                                    valid["Pawpularity"].values)).
                map(generate_image_dataset).batch(BATCH_SIZE).prefetch(1))

In [6]:
data_augmentation = keras.Sequential([
        keras.layers.experimental.preprocessing.Normalization(),
        keras.layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
        keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
        # random rotation in range [-20% * 2pi, 20% * 2pi]
        keras.layers.experimental.preprocessing.RandomRotation(factor=0.02), 
        # A positive value means zooming out, while a negative value means zooming in.
        keras.layers.experimental.preprocessing.RandomZoom(
            height_factor=0.2, width_factor=0.2
        # output zoomed out vertically in range [20%, 20%]
        # output zoomed out horizontally in the range [20%, 20%]
        )
    ],
    name="data_augmentaion"
)

In [None]:
def train_experimental_model(model,
                           optimizer,
                           checkpoint_name=None):
    model.compile(
        optimizer=optimizer,
        loss=keras.losses.MeanSquaredError(),
        metrics=[
            keras.metrics.RootMeanSquaredError(name="rmse"),
            "mae",
            "mape"
        ]
    )
    if checkpoint_name==None:
        raise ValueError("Provide checpoint model name.")
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        checkpoint_name,
        monitor="val_rmse",
        save_best_only=True,
        save_weights_only=True
    )
    early_stopping_callback = keras.callbacks.EarlyStopping(
        patience=10,
        restore_best_weights=True,
        min_delta=1e-4, 
    )
    
    history = model.fit(
        dataset_train,
        validation_data=dataset_valid,
        batch_size=BATCH_SIZE,
        epochs=NO_OF_EPOCHS,
        callbacks=[checkpoint_callback, early_stopping_callback]
    )
    return history

history = train_experimental_model(resnet_model,optimizer=keras.optimizers.Adam(),
                                   checkpoint_name="resnet_model.h5")

# Let's train a ResNet-34 CNN

In [7]:
class ResidualUnit(keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            keras.layers.Conv2D(filters, 3, strides=strides, 
                                padding="same", use_bias=False),
            keras.layers.BatchNormalization(),
            self.activation,
            keras.layers.Conv2D(filters, 3, strides=1, padding="same",
                               use_bias=False),
            keras.layers.BatchNormalization()
        ]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                keras.layers.Conv2D(filters, 1, strides=strides,
                                    padding="same", use_bias=False),
                keras.layers.BatchNormalization()
            ]
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "activation": self.activation,
                "main_layers": self.main_layers,
                "skip_layers": self.skip_layers}

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z+skip_Z)
    

resnet_model = keras.models.Sequential()
resnet_model.add(keras.layers.Conv2D(64, 7, strides=2, input_shape=[224, 224, 3],
                             padding="same", use_bias=False))
resnet_model.add(data_augmentation)
resnet_model.add(keras.layers.BatchNormalization())
resnet_model.add(keras.layers.Activation("relu"))
resnet_model.add(keras.layers.MaxPool2D(pool_size=2, strides=2, padding="same"))
prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    resnet_model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
resnet_model.add(keras.layers.GlobalAvgPool2D())
resnet_model.add(keras.layers.Flatten())
resnet_model.add(keras.layers.Dense(1))

# Submission 

In [11]:
def generate_test_dataset(filepath):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3) 
    iamge = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return image, 0


dataset_test = (tf.data.Dataset.from_tensor_slices(test_df["Path"].values).
                map(generate_test_dataset).batch(BATCH_SIZE).prefetch(1))

submission_df = test_df[["Id"]]
submission_df = submission_df.assign(Pawpularity=resnet_model.predict(dataset_test).reshape(-1))
submission_df.to_csv("submission.csv", index=False)