### Necessary Imports and Installs

In [1]:
# !pip install -U tensorflow-addons
# !pip install huggingface-hub
# !pip install transformers
# !pip install datasets

In [2]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Add, GlobalAveragePooling2D, Conv2D, Dense, AveragePooling2D, BatchNormalization, Dropout, Flatten, Lambda, Input, Activation
from tensorflow.keras import Model
from tensorflow.keras.optimizers import schedules, SGD
from tensorflow.keras.callbacks import Callback, TensorBoard as TensorboardCallback, EarlyStopping
from tensorflow.keras import backend as K

import tensorflow_addons as tfa
import tensorflow_datasets as tfds

from huggingface_hub import notebook_login, HfFolder, HfApi

from transformers import TFViTForImageClassification, create_optimizer, ViTFeatureExtractor
from transformers.keras_callbacks import PushToHubCallback

import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import scale
import time
from collections import defaultdict
import math
import copy
import threading
import opendatasets as od
# import cartopy

from GLC.data_loading.common import load_patch


%pylab inline --no-import-all
from pathlib import Path
import pandas as pd
import sys

Populating the interactive namespace from numpy and matplotlib


# Load Data

## Load Dataset from file

In [3]:
# Change this path to adapt to where you downloaded the data
DATA_PATH = Path("./geolifeclef-2022-lifeclef-2022-fgvc9/")

In [4]:
import time
hours = 4

In [5]:
### Training Dataset ###
# let's load the data from file
df_obs_fr = pd.read_csv(DATA_PATH / "observations" / "observations_fr_train.csv", sep=";", index_col="observation_id")
df_obs_us = pd.read_csv(DATA_PATH / "observations" / "observations_us_train.csv", sep=";", index_col="observation_id")

df_obs = pd.concat((df_obs_fr, df_obs_us))

print("Number of observations for training: {}".format(len(df_obs)))

### Test Dataset ###
df_obs_fr_test = pd.read_csv(DATA_PATH / "observations" / "observations_fr_test.csv", sep=";", index_col="observation_id")
df_obs_us_test = pd.read_csv(DATA_PATH / "observations" / "observations_us_test.csv", sep=";", index_col="observation_id")

df_obs_test = pd.concat((df_obs_fr_test, df_obs_us_test))

print("Number of observations for testing: {}".format(len(df_obs_test)))

df_suggested_landcover_alignment = pd.read_csv(DATA_PATH / "metadata" / "landcover_suggested_alignment.csv", sep=";")

patch = load_patch(10171444, DATA_PATH)

print("Number of data sources: {}".format(len(patch)))
print("Arrays shape: {}".format([p.shape for p in patch]))
print("Data types: {}".format([p.dtype for p in patch]))

landcover_mapping = df_suggested_landcover_alignment["suggested_landcover_code"].values

Number of observations for training: 1627475
Number of observations for testing: 36421
Number of data sources: 4
Arrays shape: [(256, 256, 3), (256, 256), (256, 256), (256, 256)]
Data types: [dtype('uint8'), dtype('uint8'), dtype('int16'), dtype('uint8')]


## Train/Val Split Labels
Retrieve the train/val split provided.

In [6]:
obs_id_train = df_obs.index[df_obs["subset"] == "train"].values
obs_id_val = df_obs.index[df_obs["subset"] == "val"].values

y_train = df_obs.loc[obs_id_train]["species_id"].values
y_val = df_obs.loc[obs_id_val]["species_id"].values

n_val = len(obs_id_val)
print("Training set size: {} ({:.1%} of train observations)".format(len(y_train), len(y_train) / len(df_obs)))
print("Validation set size: {} ({:.1%} of train observations)".format(n_val, n_val / len(df_obs)))

Training set size: 1587395 (97.5% of train observations)
Validation set size: 40080 (2.5% of train observations)


## Load patches

In [103]:
class Patches_Generator(tf.keras.utils.Sequence) :
  
    def __init__(self, obs_ids, labels, batch_size) :
        self.obs_ids = obs_ids
        self.labels = labels
        self.batch_size = batch_size
        
        # to make the generator thread safe 
        self.lock = threading.Lock()

    def __len__(self) :
        return (np.ceil(len(self.obs_ids) / float(self.batch_size))).astype(int)
  
    # returns one batch
    def __getitem__(self, idx) :
        X_batch = list()
        y_batch = list()

        for i in range(idx * self.batch_size, (idx+1) * self.batch_size):
            if i >= len(self.obs_ids): break
            
            patch = load_patch(self.obs_ids[i], DATA_PATH, data='rgb')
            # Swap axes for transformer, which needs channels as first dimension
            X_batch.append(patch[0])
            y_batch.append(self.labels[i])

        with self.lock:
            return np.asarray(X_batch), np.array(y_batch)

# Build Pre-trained Transformer

In [8]:
# Login to HuggingFace
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
# for distributed training (that is, using multiple GPUs for data parallelization)
# https://www.tensorflow.org/guide/distributed_training#use_tfdistributestrategy_with_keras_modelfit
# mirrored_strategy = tf.distribute.MirroredStrategy()

**Hyperparameters**

In [104]:
# Choose pre-trained transformer
model_id = "google/vit-base-patch16-224-in21k"

input_size = 256
input_channels = 3
input_shape = (input_size, input_size, input_channels)
num_classes = len(set(y_train))
num_train_epochs = 50
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_token = "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" # or your token directly "hf_xxx"
hub_model_id = f'{model_id.split("/")[1]}-species-prediction'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
    keras.mixed_precision.set_global_policy("mixed_float16")

In [105]:
train_data = Patches_Generator(obs_id_train, y_train, train_batch_size)
val_data = Patches_Generator(obs_id_val, y_val, train_batch_size)

# converting our train dataset to tf.data.Dataset
tf_train_dataset = tf.data.Dataset.from_generator(
    lambda: train_data ,  # Our generator 
    output_types = (tf.float32 , tf.float32) , # How we're expecting our output dtype
    output_shapes = ([train_batch_size, input_size , input_size, input_channels] , [train_batch_size, ]) # How we're expecting our output shape
)

tf_val_dataset = tf.data.Dataset.from_generator(
    lambda: val_data , 
    output_types = (tf.float32 , tf.float32), 
    output_shapes = ([train_batch_size, input_size, input_size, input_channels] , [train_batch_size, ])
)

In [106]:
# from transformers import DefaultDataCollator

# # Data collator that will dynamically pad the inputs received, as well as the labels.
# data_collator = DefaultDataCollator(return_tensors="tf")

# tf_train_dataset = processed_dataset["train"].to_tf_dataset(
#    columns=['pixel_values'],
#    label_cols=["labels"],
#    shuffle=True,
#    batch_size=train_batch_size,
#    collate_fn=data_collator)

# # converting our test dataset to tf.data.Dataset
# tf_eval_dataset = processed_dataset["test"].to_tf_dataset(
#    columns=['pixel_values'],
#    label_cols=["labels"],
#    shuffle=True,
#    batch_size=eval_batch_size,
#    collate_fn=data_collator)

In [107]:
# Swap axes for data augmentation as transformer expects channel as first dimension.

class SwapAxes(tf.keras.layers.Layer):
    def __init__(self, axis1, axis2):
        super(SwapAxes, self).__init__()
        self.axis1 = axis1
        self.axis2 = axis2
        
    def get_config(self):
        cfg = super().get_config()
        return cfg    

    def build(self, input_shape):
        super(SwapAxes, self).build

    def call(self, inputs):
        return tf.experimental.numpy.swapaxes(inputs, self.axis1, self.axis2)

layer = SwapAxes(-1, 0)

In [108]:
# Used to set image_size based on chosen transformer
feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# Data Augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size, feature_extractor.size),
        SwapAxes(-1, 1),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)

In [109]:
pixel_values = layers.Input(shape=(256, 256, 3), name='pixel_values', dtype='float32')

# Augment data
augmented = data_augmentation(pixel_values)


print(pixel_values.shape)
print(augmented.shape)

(None, 256, 256, 3)
(None, 3, 224, 224)


In [110]:
from transformers import TFViTModel

def vit(model_id, input_shape, learning_rate, num_classes):  
    # load pre-trained ViT model
    base_model = TFViTModel.from_pretrained(model_id)

    # Inputs
    pixel_values = layers.Input(shape=input_shape, name='pixel_values', dtype='float32')
    
    # Augment data
    augmented = data_augmentation(pixel_values)
    
    # Pre-trained ViT model
    vit = base_model.vit(augmented)[0]

    # Add classification head
    classifier = Dense(num_classes, name='outputs')(vit[:, 0, :])

    # Define inputs and outputs
    model = tf.keras.Model(inputs=pixel_values, outputs=classifier)

    # Optimizer
    optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, 
                                     weight_decay=weight_decay_rate)
    # Compile model
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=[
                      tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
                      tf.keras.metrics.SparseTopKCategoricalAccuracy(10, name="top-10-accuracy")
                  ]
                  )
    
    return model

In [111]:
# Create model
model = vit(model_id, (256, 256, 3), 0.00005, num_classes)

All model checkpoint layers were used when initializing TFViTModel.

All the layers of TFViTModel were initialized from the model checkpoint at google/vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [112]:
callbacks=[]

callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
# callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
# if hub_token:
#     callbacks.append(PushToHubCallback(output_dir=output_dir,
#                                        hub_model_id=hub_model_id,
#                                        hub_token=hub_token))

In [113]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_val_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
)

Epoch 1/50
      6/Unknown - 29s 3s/step - loss: 9.7396 - accuracy: 0.0000e+00 - top-10-accuracy: 0.0104 

KeyboardInterrupt: 

In [None]:
api = HfApi()

user = api.whoami(hub_token)

feature_extractor.save_pretrained(output_dir)

api.upload_file(
    token=hub_token,
    repo_id=f"{user['name']}/{hub_model_id}",
    path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
    path_in_repo="preprocessor_config.json",
)