### Necessary Imports and Installs

In [1]:
# !pip install -U tensorflow-addons
# !pip install huggingface-hub
# !pip install transformers
# !pip install datasets

In [2]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Add, GlobalAveragePooling2D, Conv2D, Dense, AveragePooling2D, BatchNormalization, Dropout, Flatten, Lambda, Input, Activation
from tensorflow.keras import Model
from tensorflow.keras.optimizers import schedules, SGD
from tensorflow.keras.callbacks import Callback, TensorBoard as TensorboardCallback, EarlyStopping
from tensorflow.keras import backend as K

import tensorflow_addons as tfa
import tensorflow_datasets as tfds

from huggingface_hub import notebook_login, HfFolder, HfApi

from transformers import TFViTForImageClassification, create_optimizer, ViTFeatureExtractor
from transformers.keras_callbacks import PushToHubCallback

import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import scale
import time
from collections import defaultdict
import math
import copy
import opendatasets as od
# import cartopy

%pylab inline --no-import-all
from pathlib import Path
import pandas as pd
import sys

Populating the interactive namespace from numpy and matplotlib


## Load Dataset from file

In [3]:
# Change this path to adapt to where you downloaded the data
DATA_PATH = Path("./geolifeclef-2022-lifeclef-2022-fgvc9/")

In [4]:
import time
hours = 4
#time.sleep(60*60*hours)

In [5]:
### Training Dataset ###
# let's load the data from file
df_obs_fr = pd.read_csv(DATA_PATH / "observations" / "observations_fr_train.csv", sep=";", index_col="observation_id")
df_obs_us = pd.read_csv(DATA_PATH / "observations" / "observations_us_train.csv", sep=";", index_col="observation_id")

df_obs = pd.concat((df_obs_fr, df_obs_us))

print("Number of observations for training: {}".format(len(df_obs)))

# let's have a look at the data
df_obs.head()

Number of observations for training: 1627475


Unnamed: 0_level_0,latitude,longitude,species_id,subset
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10561949,45.705116,1.424622,241,train
10131188,45.146973,6.416794,101,train
10799362,46.783695,-2.072855,700,train
10392536,48.604866,-2.825003,1456,train
10335049,48.815567,-0.161431,157,train


In [6]:
### Test Dataset ###
df_obs_fr_test = pd.read_csv(DATA_PATH / "observations" / "observations_fr_test.csv", sep=";", index_col="observation_id")
df_obs_us_test = pd.read_csv(DATA_PATH / "observations" / "observations_us_test.csv", sep=";", index_col="observation_id")

df_obs_test = pd.concat((df_obs_fr_test, df_obs_us_test))

print("Number of observations for testing: {}".format(len(df_obs_test)))

df_obs_test.head()

Number of observations for testing: 36421


Unnamed: 0_level_0,latitude,longitude
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1
10782781,43.601788,6.940195
10364138,46.241711,0.683586
10692017,45.181095,1.533459
10222322,46.93845,5.298678
10241950,45.017433,0.960736


In [7]:
df_suggested_landcover_alignment = pd.read_csv(DATA_PATH / "metadata" / "landcover_suggested_alignment.csv", sep=";")
df_suggested_landcover_alignment.head()

Unnamed: 0,landcover_code,suggested_landcover_code,suggested_landcover_label
0,0,0,Missing Data
1,1,11,Cultivated Crops
2,2,11,Cultivated Crops
3,3,6,Broad-leaved Forest
4,4,7,Coniferous Forest


In [8]:
from GLC.data_loading.common import load_patch

patch = load_patch(10171444, DATA_PATH)

print("Number of data sources: {}".format(len(patch)))
print("Arrays shape: {}".format([p.shape for p in patch]))
print("Data types: {}".format([p.dtype for p in patch]))

Number of data sources: 4
Arrays shape: [(256, 256, 3), (256, 256), (256, 256), (256, 256)]
Data types: [dtype('uint8'), dtype('uint8'), dtype('int16'), dtype('uint8')]


In [9]:
landcover_mapping = df_suggested_landcover_alignment["suggested_landcover_code"].values
#patch = load_patch(10171444, DATA_PATH, landcover_mapping=landcover_mapping)

In [10]:
# from GLC.plotting import visualize_observation_patch

# # Extracts land cover labels
# landcover_labels = df_suggested_landcover_alignment[["suggested_landcover_code", "suggested_landcover_label"]].drop_duplicates().sort_values("suggested_landcover_code")["suggested_landcover_label"].values

# visualize_observation_patch(patch, observation_data=df_obs.loc[10561900], landcover_labels=landcover_labels)

In [11]:
"""patch = load_patch(22068100, DATA_PATH, landcover_mapping=landcover_mapping)

visualize_observation_patch(patch, observation_data=df_obs.loc[22068100], landcover_labels=landcover_labels)
"""

'patch = load_patch(22068100, DATA_PATH, landcover_mapping=landcover_mapping)\n\nvisualize_observation_patch(patch, observation_data=df_obs.loc[22068100], landcover_labels=landcover_labels)\n'

### Train/Val Split Labels
Retrieve the train/val split provided.

In [12]:
obs_id_train = df_obs.index[df_obs["subset"] == "train"].values
obs_id_val = df_obs.index[df_obs["subset"] == "val"].values

y_train = df_obs.loc[obs_id_train]["species_id"].values
y_val = df_obs.loc[obs_id_val]["species_id"].values

n_val = len(obs_id_val)
print("Training set size: {} ({:.1%} of train observations)".format(len(y_train), len(y_train) / len(df_obs)))
print("Validation set size: {} ({:.1%} of train observations)".format(n_val, n_val / len(df_obs)))

Training set size: 1587395 (97.5% of train observations)
Validation set size: 40080 (2.5% of train observations)


### Load patches

In [13]:
# load training dataset samples
# factor = 1 means load full training dataset
# factor = 100 means load 1/100 of the full dataset

def load_train_data(factor):
    X_train = list() #np.array((np.shape(y_train), 256, 256, 3))
    for obs_id in obs_id_train:
        patch = load_patch(obs_id, DATA_PATH, landcover_mapping=landcover_mapping)
        X_train.append(patch[0])

        percent_progress = len(X_train)/(len(obs_id_train)/factor) * 100
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-20s] %d%%" % ('='*int(percent_progress/5), percent_progress))
        sys.stdout.flush()

        if len(X_train) >= (len(obs_id_train)/factor):
            break
    print()
    
    X_train = np.array(X_train)
    return X_train

def load_val_data(factor):
    X_val = list() #np.array((np.shape(y_train), 256, 256, 3))
    for obs_id in obs_id_val:
        patch = load_patch(obs_id, DATA_PATH, landcover_mapping=landcover_mapping)
        X_val.append(patch[0])

        percent_progress = len(X_val)/(len(y_val)/factor) * 100
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-20s] %d%%" % ('='*int(percent_progress/5), percent_progress))
        sys.stdout.flush()

        if len(X_val) >= (len(y_val)/factor):
            break

    print()

    X_val = np.array(X_val)

In [14]:
factor = 1000
# train_ds = tf.data.Dataset.from_tensor_slices((load_train_data(factor), y_train[:obs_id_train//factor]))
# train_ds = train_ds.batch(64)

In [15]:
num_classes = len(set(df_obs['species_id']))
input_shape = (256, 256, 3)

In [16]:
# val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val[:len(X_val)]))
# val_ds = val_ds.batch(64)

# Transformer - ViT - from scratch

In [17]:
# learning_rate = 0.001
# weight_decay = 0.0001
# batch_size = 256
# num_epochs = 50
# image_size = 72  # We'll resize input images to this size
# patch_size = 6  # Size of the patches to be extract from the input images
# num_patches = (image_size // patch_size) ** 2
# projection_dim = 64
# num_heads = 4
# transformer_units = [
#     projection_dim * 2,
#     projection_dim,
# ]  # Size of the transformer layers
# transformer_layers = 8
# mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier

In [18]:
# # Data augmentation
# data_augmentation = keras.Sequential(
#     [
#         layers.Normalization(),
#         layers.Resizing(image_size, image_size),
#         layers.RandomFlip("horizontal"),
#         layers.RandomRotation(factor=0.02),
#         layers.RandomZoom(
#             height_factor=0.2, width_factor=0.2
#         ),
#     ],
#     name="data_augmentation",
# )
# # Compute the mean and the variance of the training data for normalization.
# data_augmentation.layers[0].adapt(X_train)

# # multi-layer perceptron
# def mlp(x, hidden_units, dropout_rate):
#     for units in hidden_units:
#         x = layers.Dense(units, activation=tf.nn.gelu)(x)
#         x = layers.Dropout(dropout_rate)(x)
#     return x

# # Patch creation
# class Patches(layers.Layer):
#     def __init__(self, patch_size):
#         super(Patches, self).__init__()
#         self.patch_size = patch_size

#     def call(self, images):
#         batch_size = tf.shape(images)[0]
#         patches = tf.image.extract_patches(
#             images=images,
#             sizes=[1, self.patch_size, self.patch_size, 1],
#             strides=[1, self.patch_size, self.patch_size, 1],
#             rates=[1, 1, 1, 1],
#             padding="VALID",
#         )
#         patch_dims = patches.shape[-1]
#         patches = tf.reshape(patches, [batch_size, -1, patch_dims])
#         return patches
    
# class PatchEncoder(layers.Layer):
#     def __init__(self, num_patches, projection_dim):
#         super(PatchEncoder, self).__init__()
#         self.num_patches = num_patches
#         self.projection = layers.Dense(units=projection_dim)
#         self.position_embedding = layers.Embedding(
#             input_dim=num_patches, output_dim=projection_dim
#         )

#     def call(self, patch):
#         positions = tf.range(start=0, limit=self.num_patches, delta=1)
#         encoded = self.projection(patch) + self.position_embedding(positions)
#         return encoded

In [19]:
# # Visualize patches
# plt.figure(figsize=(4, 4))
# image = X_train[np.random.choice(range(X_train.shape[0]))]
# plt.imshow(image.astype("uint8"))
# plt.axis("off")

# resized_image = tf.image.resize(
#     tf.convert_to_tensor([image]), size=(image_size, image_size)
# )
# patches = Patches(patch_size)(resized_image)
# print(f"Image size: {image_size} X {image_size}")
# print(f"Patch size: {patch_size} X {patch_size}")
# print(f"Patches per image: {patches.shape[1]}")
# print(f"Elements per patch: {patches.shape[-1]}")

# n = int(np.sqrt(patches.shape[1]))
# plt.figure(figsize=(4, 4))
# for i, patch in enumerate(patches[0]):
#     ax = plt.subplot(n, n, i + 1)
#     patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
#     plt.imshow(patch_img.numpy().astype("uint8"))
#     plt.axis("off")

In [20]:
# def create_vit_classifier():
#     inputs = layers.Input(shape=input_shape)
#     # Augment data.
#     augmented = data_augmentation(inputs)
#     # Create patches.
#     patches = Patches(patch_size)(augmented)
#     # Encode patches.
#     encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

#     # Create multiple layers of the Transformer block.
#     for _ in range(transformer_layers):
#         # Layer normalization 1.
#         x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
#         # Create a multi-head attention layer.
#         attention_output = layers.MultiHeadAttention(
#             num_heads=num_heads, key_dim=projection_dim, dropout=0.1
#         )(x1, x1)
#         # Skip connection 1.
#         x2 = layers.Add()([attention_output, encoded_patches])
#         # Layer normalization 2.
#         x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
#         # MLP.
#         x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
#         # Skip connection 2.
#         encoded_patches = layers.Add()([x3, x2])

#     # Create a [batch_size, projection_dim] tensor.
#     representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
#     representation = layers.Flatten()(representation)
#     representation = layers.Dropout(0.5)(representation)
#     # Add MLP.
#     features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    
#     # Classify outputs.
#     logits = layers.Dense(num_classes)(features)
#     # Create the Keras model.
#     model = keras.Model(inputs=inputs, outputs=logits)
#     return model

In [21]:
# def run_experiment(model):
#     optimizer = tfa.optimizers.AdamW(
#         learning_rate=learning_rate, weight_decay=weight_decay
#     )

#     model.compile(
#         optimizer=optimizer,
#         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#         metrics=[
#             keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
#             keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
#         ],
#     )

#     checkpoint_filepath = "/tmp/checkpoint"
#     checkpoint_callback = keras.callbacks.ModelCheckpoint(
#         checkpoint_filepath,
#         monitor="val_accuracy",
#         save_best_only=True,
#         save_weights_only=True,
#     )

#     history = model.fit(
#         X_train, y_train[:len(X_train)],
#         batch_size=batch_size,
#         epochs=num_epochs,
#         validation_data=val_ds,
#         callbacks=[checkpoint_callback],
#     )

#     model.load_weights(checkpoint_filepath)
# #     _, accuracy, top_5_accuracy = model.evaluate(x_test, y_test)
# #     print(f"Test accuracy: {round(accuracy * 100, 2)}%")
# #     print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

#     return history


# vit_classifier = create_vit_classifier()
# history = run_experiment(vit_classifier)

In [22]:
# vit_classifier.summary()

# Transformer - ViT - pre-trained

In [23]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [24]:
model_id = "google/vit-base-patch16-224-in21k"

In [25]:
from scipy.ndimage import zoom

In [26]:
import os
import datasets

def create_dataset(y, factor=1000):
    """creates `Dataset` from image folder structure"""    
    
    # defines `datasets` features`
    labels = list(set(y))
    labels = list(np.char.mod('%d', labels))
    features=datasets.Features({
                      "pixel_values": datasets.Array3D((3, 224, 224), dtype='int16'),
                      "label": datasets.features.ClassLabel(names = labels),
                  })
    # create dataset
    
    X = load_train_data(factor)
    
    processed_X = zoom(X, (1, 224/256, 224/256, 1))
    processed_X = np.swapaxes(processed_X, 1, -1) # Can you just do this?
    
    print(processed_X.shape)
    
    y = np.char.mod('%d', y)
    ds = datasets.Dataset.from_dict({"pixel_values": processed_X, "label": y[:len(X)]}, features=features)
    return ds

In [27]:
train_ds = create_dataset(y_train, factor=5000)

(318, 3, 224, 224)


In [28]:
print(train_ds)

Dataset({
    features: ['pixel_values', 'label'],
    num_rows: 318
})


In [29]:
img_class_labels = train_ds.features["label"].names

In [33]:
image_size = 224

feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(image_size, image_size),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    examples["pixel_values"] = [data_augmentation(image) for image in examples["pixel_values"]]
    return examples

# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['pixel_values'], ))
    return examples
 
# we are also renaming our label col to labels to use `.to_tf_dataset` later
train_ds = train_ds.rename_column("label", "labels")

2022-04-25 20:33:30.116789: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 20:33:30.117644: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 20:33:30.254212: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 20:33:30.254948: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-25 20:33:30.255575: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from S

In [34]:
# augmenting dataset takes a lot of time
# processed_dataset = train_ds.map(process, batched=True)
# processed_dataset

# processed_dataset = eurosat_ds.map(augmentation, batched=True)

In [35]:
# test size will be 15% of train dataset
test_size=.15

processed_dataset = train_ds.shuffle().train_test_split(test_size=test_size)

**Hyperparameters**

In [40]:
num_train_epochs = 50
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_token = "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" # or your token directly "hf_xxx"
hub_model_id = f'{model_id.split("/")[1]}-species-prediction'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
    keras.mixed_precision.set_global_policy("mixed_float16")

In [41]:
processed_dataset['train'].features['pixel_values']

Array3D(shape=(3, 224, 224), dtype='int16', id=None)

In [42]:
from transformers import DefaultDataCollator

# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")

# converting our train dataset to tf.data.Dataset
tf_train_dataset = processed_dataset["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)

# converting our test dataset to tf.data.Dataset
tf_eval_dataset = processed_dataset["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

In [45]:
# create optimizer wight weigh decay
num_train_steps = len(train_ds) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# load pre-trained ViT model
base_model = TFViTForImageClassification.from_pretrained(model_id)

# Inputs
pixel_values = layers.Input(shape=(3, 224, 224), name='pixel_values', dtype='float32')

# Pre-trained ViT model
vit = base_model.vit(pixel_values)[0]

# Add classification head
classifier = tf.keras.layers.Dense(num_classes, name='outputs')(vit[:, 0, :])

model = tf.keras.Model(inputs=pixel_values, outputs=classifier)

# Optimizer
optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, 
                                 weight_decay=weight_decay_rate)
# Compile model
model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[
                  tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
                  tf.keras.metrics.SparseTopKCategoricalAccuracy(10, name="top-10-accuracy")
              ]
              )

Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/bias:0', 'vit/pooler/dense/kernel:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [46]:
callbacks=[]

callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
# callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
# if hub_token:
#     callbacks.append(PushToHubCallback(output_dir=output_dir,
#                                        hub_model_id=hub_model_id,
#                                        hub_token=hub_token))

In [47]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50


Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [48]:
api = HfApi()

user = api.whoami(hub_token)

feature_extractor.save_pretrained(output_dir)

api.upload_file(
    token=hub_token,
    repo_id=f"{user['name']}/{hub_model_id}",
    path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
    path_in_repo="preprocessor_config.json",
)

'https://huggingface.co/jannikjw/vit-base-patch16-224-in21k-species-prediction/blob/main/preprocessor_config.json'

# First Simple Neural Network
Let's create a first neural network as a baseline to see how it performs.

In [None]:
# returns a 10 layer ReLU model of width 2
def simple_model(input_shape):
    model = tf.keras.models.Sequential()
    
    # 1. Preprocessing
    # rescale inputs
    model.add(tf.keras.layers.Rescaling(1./255))

    # 2. Convolutional Layers
    model.add(Conv2D(32, kernel_size=5, activation='relu', input_shape=input_shape, padding='same'))
    #model.add(AveragePooling2D())

    model.add(Conv2D(64, kernel_size=5, activation='relu', padding='same'))
    #model.add(AveragePooling2D())
    
    model.add(Conv2D(128, kernel_size=5, activation='relu', padding='same'))
    
    # from convolutional layers to dense layers
    model.add(tf.keras.layers.Flatten())
    
    # 3. Dense Layers
    model.add(Dense(64, activation='relu'))
    model.add(Dense(64, activation='relu'))
    
    # 4. Output Layer
    model.add(Dense(4911, activation='softmax'))
    
    # compire the model
    model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy,
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                  metrics=['accuracy'])
    
    return model

In [None]:
# create the network
model = simple_model((256, 256, 3))

In [None]:
np.max(y_train[:len(X_train)])

In [None]:
np.min(y_train[:len(X_train)])

Train the network.

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='accuracy', min_delta=0.001, patience=5, 
                                              verbose=0, mode='auto', baseline=None, restore_best_weights=True)


In [None]:
history = model.fit(train_ds, validation_data=val_ds, #X_train, y_train[:len(X_train)], #validation_data=(X_val, y_val), 
                    epochs=100, 
                    callbacks=[early_stop])

In [None]:
model.save('first_simple_model')