# Turtle Recall
A facial recognition model for turtles

https://zindi.africa/competitions/turtle-recall-conservation-challenge/data

In [None]:
!pip install tensorflow-addons

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import requests
import io
import urllib.parse
import tqdm
import datetime
from PIL import Image

In [None]:
print(f'TensorFlow version is {tf.__version__}')

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

In [None]:
%load_ext tensorboard

# Dataset

First, we load the data. In addition to the turtles and images from the train.csv file, we also make use of extra_images.csv by concatenation with the train file. This yields substantially more (ca. 10.000) image files to later train the model on.

In [None]:
SOURCE_URL = 'https://storage.googleapis.com/dm-turtle-recall/images.tar'
IMAGE_DIR = './data/images'
TAR_PATH = os.path.join(IMAGE_DIR, os.path.basename(SOURCE_URL))
EXPECTED_IMAGE_COUNT = 13891

%sx mkdir --parents "{IMAGE_DIR}"
if len(os.listdir(IMAGE_DIR)) != EXPECTED_IMAGE_COUNT:
  %sx wget --no-check-certificate -O "{TAR_PATH}" "{SOURCE_URL}"
  %sx tar --extract --file="{TAR_PATH}" --directory="{IMAGE_DIR}"
  %sx rm "{TAR_PATH}"

print(f'The total number of images is: {len(os.listdir(IMAGE_DIR))}')

In [None]:
BASE_URL = 'https://storage.googleapis.com/dm-turtle-recall/'

def read_csv_from_web(file_name):
  url = urllib.parse.urljoin(BASE_URL, file_name)
  content = requests.get(url).content
  return pd.read_csv(io.StringIO(content.decode('utf-8')))


# Read in csv files.
train = read_csv_from_web('train.csv')
extra_images = read_csv_from_web('extra_images.csv')

# Convert image_location strings to lowercase.
for row in [train]:
  row.image_location = row.image_location.apply(lambda x: x.lower())
  assert set(row.image_location.unique()) == set(['left', 'right', 'top'])

df = pd.concat(objs=[train, extra_images])

In [None]:
images_per_turtle = pd.value_counts(df.turtle_id)
print(f'The total number of turtles is {len(df.turtle_id.unique())}.\n'
      'The mean number of training images per turtle is '
      f'{round(np.mean(images_per_turtle), 2)}, '
      f'and the median is {int(np.median(images_per_turtle))}.')

As we can see, however, we don't get a lot of images per turtle on average. Actually, some 2000 turtles are represented with less than 10 images in the dataset, which leads to a huge imbalance. Hence, we decide not to make use of any turtle with less than `MIN_NR_IMGS`.

In [None]:
MIN_NR_IMGS = 10

im_per_turtle = images_per_turtle[images_per_turtle >= MIN_NR_IMGS].to_frame()
df = df[df.turtle_id.isin(im_per_turtle.index)].reset_index()

In [None]:
images_per_turtle = pd.value_counts(df.turtle_id)
print(f'The total number of turtles after removal is {len(df.turtle_id.unique())}.\n'
      'The mean number of training images per turtle is now '
      f'{round(np.mean(images_per_turtle), 2)}, '
      f'and the median is {int(np.median(images_per_turtle))}. \n'
      f'The smallest number of images per turtle is '
      f'{min(df.turtle_id.value_counts())}.')

In [None]:
print(df.shape)
df.head(3)

We have now removed a significant portion of the data and are left with about 5000 images, which is still more than double the amount of the initial images in the `train.csv`. There is, however, still a huge imbalance in the dataset and the total number of files is quite small.

In [None]:
plt.hist(x=images_per_turtle, rwidth=0.9, bins=20)
plt.xlabel('Images per train turtle')
plt.show()

# Preprocessing

We create mappings and get the paths to the image files. After that follows some basic and some advanced preprocessing.

In [None]:
turtle_ids = sorted(np.unique(df.turtle_id)) + ['new_turtle']

image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) if f.split('.')[0] in df.image_id.values]
image_ids = [os.path.basename(f).split('.')[0] for f in image_files]

image_to_turtle = dict(zip(df.image_id, df.turtle_id))
labels = dict(zip(turtle_ids, np.arange(len(turtle_ids))))

loaded_labels = [labels[image_to_turtle[id]] for id in image_ids]

In [None]:
NUM_CLASSES = len(turtle_ids)

In [None]:
def crop_and_resize(pil_img, img_size=(224,224)):
  """Crop square from center of image and resize."""

  w, h = pil_img.size
  crop_size = min(w, h)
  crop = pil_img.crop(((w - crop_size) // 2, (h - crop_size) // 2,
                       (w + crop_size) // 2, (h + crop_size) // 2))
  
  return crop.resize(img_size)

tqdm.tqdm._instances.clear()
loaded_images = [crop_and_resize(Image.open(f)) for f in tqdm.tqdm(image_files)]

In [None]:
# inspect an image
print(loaded_images[0].size)
print(len(loaded_images))
loaded_images[0]

In [None]:
ims = tf.stack([tf.convert_to_tensor(np.asarray(im), dtype=tf.float32) for im in loaded_images])
labels = tf.stack(loaded_labels)

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((ims, labels))
train_ds = train_ds.map(lambda x,y: (x/255., tf.one_hot(y, NUM_CLASSES)))

print(f'The dataset contains {train_ds.cardinality().numpy()} images.')

# Data augmentation

Before applying augmentation to our images and hence increasing the size of our training data, we shuffle the current dataset, take a few images and store them in a test set for eventually evaluating our model. We do this to preserve the real-world data we want our model to work on later. The augmentation is then only used on training and validation data to make sure our model learns with a variety of different images and is robust against noise, different colour and brightness values, etc.

In [None]:
BUFFER = train_ds.cardinality().numpy()
TEST_SPLIT = 1000

train_ds = train_ds.shuffle(buffer_size=BUFFER, reshuffle_each_iteration=False)
test_ds, train_ds = train_ds.take(TEST_SPLIT), train_ds.skip(TEST_SPLIT)

print(
    f'Train images: {train_ds.cardinality().numpy()}', 
    f'Test images: {test_ds.cardinality().numpy()}', 
    sep='\n')

### Augmentation functions

In [None]:
def rotate_images(ds):
    """
    Rotates images by 90, 180, and 270 degrees.
    Quadruples size of dataset.
    """

    ds_rotated_90 = ds.map(lambda x,y: (tfa.image.rotate(x, angles=0.5*np.pi), y))
    ds_rotated_180 = ds.map(lambda x,y: (tfa.image.rotate(x, angles=np.pi), y))
    ds_rotated_270 = ds.map(lambda x,y: (tfa.image.rotate(x, angles=1.5*np.pi), y))

    ds = ds_rotated_90.concatenate(ds_rotated_180).concatenate(ds_rotated_270)

    return ds

In [None]:
def apply_gaussian_filter(ds, filter_shape=7, sigma=2):
    """
    Apply a Gaussian image blur. Doubles the size of the input dataset.
    """

    ds_gaussian = ds.map(lambda x,y: (tfa.image.gaussian_filter2d(x, filter_shape=filter_shape, sigma=sigma), y))
    return ds_gaussian

In [None]:
def random_hsv(ds):
    """
    Randomly adjust hue, saturation, value of an RGB image in the YIQ color space.
    """

    ds_hsv = ds.map(lambda x,y: (tfa.image.random_hsv_in_yiq(x, max_delta_hue=0.8, lower_saturation=0.2, upper_saturation=0.8, lower_value=0.2, upper_value=0.8), y))
    return ds_hsv


In [None]:
def add_noise(ds, sd=0.2):
    """
    Additive noise
    """

    ds_noise = ds.map(lambda x,y: (x + tf.random.normal(x.shape, mean=0.0, stddev=sd, dtype=tf.float32), y))
    ds_noise = ds_noise.map(lambda x,y: (tf.clip_by_value(x, 0.0, 1.0), y))

    return ds_noise

If desired, single augmentations can be added to the dataset by concatenation.

In [None]:
ds_rotated = rotate_images(train_ds)
ds_gaussian = apply_gaussian_filter(train_ds)
ds_hsv = random_hsv(train_ds)
ds_noise = add_noise(train_ds, 0.2)

#train_ds = train_ds.concatenate(ds_rotated).concatenate(ds_gaussian).concatenate(ds_hsv).concatenate(ds_noise)


In [None]:
BATCH_SIZE = 64
TRAIN_SPLIT = np.round(train_ds.cardinality().numpy() * 0.85)

train_ds, val_ds = train_ds.take(TRAIN_SPLIT), train_ds.skip(TRAIN_SPLIT)

train_ds = train_ds.shuffle(2048).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
train_ds = train_ds.cache(filename='cached_train_ds')

val_ds = val_ds.shuffle(1024).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
val_ds = val_ds.cache(filename='cached_val_ds')

test_ds = test_ds.shuffle(1024).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
test_ds = test_ds.cache(filename='cached_test_ds')

print(
    f'Training dataset contains {train_ds.cardinality().numpy() * BATCH_SIZE} images after data augmentation.',
    f'Validation dataset contains {val_ds.cardinality().numpy() * BATCH_SIZE} images.',
    sep='\n')

# Training

In [None]:
tf.keras.backend.clear_session()
NR_EPOCHS = 10

## EfficientNetV2

Now we bring in a pre-trained EfficientNetV2-B0, a fairly new architecture that has been build to be extremely efficient and transfer well.

In [None]:
hub_url = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2"

efficientNet = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224,224,3)),
    hub.KerasLayer(hub_url, trainable=True),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

efficientNet.summary()

In [None]:
efficientNet.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(top_k=5)]
)

log_dir = "logs/efficientNetV2" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
%tensorboard --logdir=logs

In [None]:
efficientNet.fit(train_ds,
                 epochs=NR_EPOCHS, 
                 validation_data=val_ds,
                 callbacks=[tensorboard_callback])

In [None]:
efficientNet.evaluate(test_ds)

## InceptionV3

InceptionV3 CNN as per Szegedy et al. (2015).

In [None]:
hub_url = "https://tfhub.dev/google/inaturalist/inception_v3/feature_vector/5"

inception = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224,224,3)),
    hub.KerasLayer(hub_url, trainable=True),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

inception.summary()

In [None]:
%tensorboard --logdir=logs

In [None]:
inception.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(top_k=5)]
)

log_dir = "logs/inceptionV3" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
inception.fit(train_ds,
                 epochs=NR_EPOCHS, 
                 validation_data=val_ds,
                 callbacks=[tensorboard_callback])

In [None]:
inception.evaluate(test_ds)