In [None]:
# Download the datatset from Kaggle.
!kaggle datasets download -p datasets/flowers-recognition -d alxmamaev/flowers-recognition --unzip

In [None]:
import tensorflow as tf
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class_names = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

data = []
for file in glob.glob('datasets/flowers-recognition/flowers/*'):
    label = file.split('/')[-1]
    for img in glob.glob(f'{file}/*.jpg'):
        data.append((img, label)) 

print(f'There are {len(data)} images')

In [None]:
# Shuffle the data before partitioning
np.random.shuffle(data)

# Split the data into train, validation and test sets
train, val, test = np.split(data, [int(len(data) * 0.7), int(len(data) * 0.8)])

train_df = pd.DataFrame({'image':train[:,0], 'label':train[:,1]})
val_df = pd.DataFrame({'image':val[:,0], 'label':val[:,1]})
test_df = pd.DataFrame({'image':test[:,0], 'label':test[:,1]})

print(f'There are {len(train_df)} images for training')
print(f'There are {len(val_df)} images for validation')
print(f'There are {len(test_df)} images for testing')

In [None]:
# Give the CNN 32 images at a time. The lower the batch size, the better the model will learn,
# however, the training process will be longer.
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224

# Create the ImageDataGenerator object and rescale the images
image_generator = tf.keras.preprocessing.image.ImageDataGenerator()

# Convert them into a dataset to be split into batches, shuffled and resized
train_dataset = image_generator.flow_from_dataframe(
    dataframe=train_df,
    class_mode='categorical',
    x_col='image',
    y_col='label',
    batch_size=BATCH_SIZE,
    seed=42,
    shuffle=True,
    target_size=(IMG_HEIGHT,IMG_WIDTH)
)

val_dataset = image_generator.flow_from_dataframe(
    dataframe=val_df,
    class_mode='categorical',
    x_col='image',
    y_col='label',
    batch_size=BATCH_SIZE,
    seed=42,
    shuffle=True,
    target_size=(IMG_HEIGHT,IMG_WIDTH)
)

test_dataset = image_generator.flow_from_dataframe(
    dataframe=test_df,
    class_mode='categorical',
    x_col='image',
    y_col='label',
    batch_size=BATCH_SIZE,
    seed=42,
    shuffle=True,
    target_size=(IMG_HEIGHT,IMG_WIDTH)
)

train_images, train_labels = next(iter(train_dataset))

print(f'Batch shape: {train_images.shape}')
print(f'Label shape: {train_labels.shape}')

In [None]:
# Verify the data by plotting the first few images in the dataset
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.xticks([]), plt.yticks([])
    plt.imshow(train_images[i]/ 255)
    plt.xlabel(class_names[np.argmax(train_labels[i])])
plt.show()

In [None]:
# Transfer learning with a pre-trained model from Keras Applications
base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2S(include_top=False, weights='imagenet', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), pooling='avg')

model = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

# Freezing the weights in the base model since it is pre-trained
base_model.trainable = False

model.summary()

In [None]:
# Specify the algorithm for backpropagation, the loss function and a performace metric
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
# Stop training early if the validation loss is constant or increasing for more than 3 epochs
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [None]:
history = model.fit(train_dataset, batch_size=32, epochs=10, validation_data=val_dataset)

In [None]:
def plot_metric(history, metric='loss'):
    plt.title(metric.capitalize())
    plt.plot(history.history[metric])
    plt.plot(history.history[f'val_{metric}'])
    plt.xlabel('Epoch'), plt.ylabel(metric.capitalize())
    plt.legend(['Training', 'Validation'])
    plt.show()

# Evaluate the network
plot_metric(history, 'loss')
plot_metric(history, 'accuracy')

# Test the network on unseen data
loss, acc = model.evaluate(test_dataset)

In [None]:
def process_image(url):
    image = plt.imread(tf.keras.utils.get_file(origin=url))
    image = tf.image.resize(image, [IMG_HEIGHT, IMG_WIDTH])
    image = np.expand_dims(image, 0)
    return image

image = process_image('https://upload.wikimedia.org/wikipedia/commons/c/c4/Tulipa_orphanidea_060506.jpg')
predictions = model.predict(image)
print(predictions)

plt.xticks([]), plt.yticks([])
plt.xlabel(f'{class_names[np.argmax(predictions[0])]} ({np.max(predictions):.2f})')
plt.imshow(image[0,:,:,:] / 255)

In [None]:
# Save the model
model.save('saved/image-classifier-flowers-recognition.h5')