
This notebook trains a ResNet50 convolutional neural network for image classification. It is trained on a custom dataset containing Colorado plants created using the iNaturalist API. The dataset contains:

- 434 classes (plant species)
- 180710 features (images)
- Between 400 and 600 features per class

Final performance metrics on validation dataset for model after initial training and two stages of gradual fine tuning:

- val_accuracy: 0.9223 (92.2%)
- val_loss: 0.3364


In [2]:
import tensorflow as tf
import tensorflow.keras

In [6]:
# Constants
TFRECORD_FILENAME = "drive/MyDrive/resnet50_dataset.tfrecord"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 64
NUM_CLASSES = 434
EPOCHS_INITIAL = 15
EPOCHS_FINE = 10
TOTAL_EXAMPLES = 180710

In [4]:
# Function to provide additional pre-processing, specific to ResNet50's input
def parse_example(example_proto):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.io.decode_jpeg(parsed['image'], channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)

    image = tf.keras.applications.resnet.preprocess_input(tf.cast(image, tf.float32))
    label = parsed['label']
    return image, label

In [5]:
# Train/validation split of dataset with shuffling
def get_finite_dataset():
    raw_dataset = tf.data.TFRecordDataset(TFRECORD_FILENAME)
    dataset = raw_dataset.map(parse_example, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

dataset = get_finite_dataset().shuffle(buffer_size=50000)
train_size = int(0.8 * TOTAL_EXAMPLES)

train_ds = dataset.take(train_size).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_ds = dataset.skip(train_size).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [7]:
# Set up base moidel
base_model = tf.keras.applications.ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
)

# Freeze base so it doesn't get overwritten
base_model.trainable = False

inputs = tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)
model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [8]:
# Initial training
history = model.fit(
    train_ds,
    epochs=EPOCHS_INITIAL,
    validation_data=val_ds,
)

Epoch 1/15
   2259/Unknown [1m112s[0m 35ms/step - accuracy: 0.3210 - loss: 3.1827



[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 72ms/step - accuracy: 0.3210 - loss: 3.1826 - val_accuracy: 0.4937 - val_loss: 2.1994
Epoch 2/15
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 65ms/step - accuracy: 0.5149 - loss: 2.0533 - val_accuracy: 0.5599 - val_loss: 1.8465
Epoch 3/15
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 65ms/step - accuracy: 0.5665 - loss: 1.7849 - val_accuracy: 0.5943 - val_loss: 1.6752
Epoch 4/15
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 65ms/step - accuracy: 0.5962 - loss: 1.6497 - val_accuracy: 0.6233 - val_loss: 1.5320
Epoch 5/15
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 65ms/step - accuracy: 0.6185 - loss: 1.5398 - val_accuracy: 0.6443 - val_loss: 1.4411
Epoch 6/15
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 65ms/step - accuracy: 0.6332 -

In [9]:
# In case fine tuning goes badly, we can save/load the model after initial training so we don't have to re-train completely
model.save("initial_resnet50.keras")

In [None]:
model = tf.keras.models.load_model("initial_resnet50.keras")

In [10]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [11]:
# Gradual unfreezing using ReduceLROnPlateau scheduler in case of plateau
# Starting with last 20 layers
for layer in base_model.layers[:-20]:
    layer.trainable = False
for layer in base_model.layers[-20:]:
    if isinstance(layer, tf.keras.layers.BatchNormalization):
        layer.trainable = False
    else:
        layer.trainable = True

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)

In [13]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history_fine_stage1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=[lr_scheduler]
)

Epoch 1/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 75ms/step - accuracy: 0.6625 - loss: 1.3115 - val_accuracy: 0.8069 - val_loss: 0.7871 - learning_rate: 1.0000e-05
Epoch 2/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 69ms/step - accuracy: 0.7384 - loss: 0.9913 - val_accuracy: 0.8299 - val_loss: 0.6930 - learning_rate: 1.0000e-05
Epoch 3/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 69ms/step - accuracy: 0.7710 - loss: 0.8538 - val_accuracy: 0.8439 - val_loss: 0.6476 - learning_rate: 1.0000e-05
Epoch 4/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 69ms/step - accuracy: 0.7934 - loss: 0.7601 - val_accuracy: 0.8572 - val_loss: 0.5819 - learning_rate: 1.0000e-05
Epoch 5/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 69ms/step - accuracy: 0.8157 - loss: 0.6750 - val_accuracy: 0.8689 - val_loss: 0.5438 - learning_rate: 1.0000e-05


In [14]:
model.save("stage1_resnet50.keras")

In [15]:
# Fine tuning for last 40 layers
for layer in base_model.layers[:-40]:
    layer.trainable = False
for layer in base_model.layers[-40:]:
    if isinstance(layer, tf.keras.layers.BatchNormalization):
        layer.trainable = False
    else:
        layer.trainable = True


In [16]:
history_fine_stage2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,
    callbacks=[lr_scheduler]
)

Epoch 1/5
   2259/Unknown [1m99s[0m 36ms/step - accuracy: 0.8339 - loss: 0.6041



[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 69ms/step - accuracy: 0.8339 - loss: 0.6041 - val_accuracy: 0.8798 - val_loss: 0.5007 - learning_rate: 1.0000e-05
Epoch 2/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 69ms/step - accuracy: 0.8532 - loss: 0.5352 - val_accuracy: 0.8933 - val_loss: 0.4488 - learning_rate: 1.0000e-05
Epoch 3/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 69ms/step - accuracy: 0.8654 - loss: 0.4833 - val_accuracy: 0.9019 - val_loss: 0.4084 - learning_rate: 1.0000e-05
Epoch 4/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 69ms/step - accuracy: 0.8800 - loss: 0.4311 - val_accuracy: 0.9107 - val_loss: 0.3792 - learning_rate: 1.0000e-05
Epoch 5/5
[1m2259/2259[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 69ms/step - accuracy: 0.8925 - loss: 0.3861 - val_accuracy: 0.9223 - val_loss: 0.3364 - le

In [17]:
model.save("stage2_resnet50.keras")