## Dandelion Image Classification using Transfer Learning

Given images of grass, let's try to predict whether there are **dandelions** in a given image.

We will use a tensorflow/keras pretrained convolutional neural network to make our predictions.

Data source: https://www.kaggle.com/datasets/coloradokb/dandelionimages

### Getting Started

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf

from sklearn.metrics import accuracy_score, f1_score

2024-09-15 12:23:44.871696: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
image_dir = Path("Images")

### Creating File DataFrame

In [14]:
filepaths = list(image_dir.glob(r'**/*.jpg'))
filepaths

[PosixPath('Images/other/IMG_5663.jpg'),
 PosixPath('Images/other/IMG_5468.jpg'),
 PosixPath('Images/other/IMG_6262.jpg'),
 PosixPath('Images/other/IMG_5915.jpg'),
 PosixPath('Images/other/IMG_6520.jpg'),
 PosixPath('Images/other/IMG_6158.jpg'),
 PosixPath('Images/other/IMG_5963.jpg'),
 PosixPath('Images/other/IMG_5622.jpg'),
 PosixPath('Images/other/IMG_5824.jpg'),
 PosixPath('Images/other/IMG_6649.jpg'),
 PosixPath('Images/other/IMG_6813.jpg'),
 PosixPath('Images/other/IMG_6251.jpg'),
 PosixPath('Images/other/IMG_5988.jpg'),
 PosixPath('Images/other/IMG_5974.jpg'),
 PosixPath('Images/other/IMG_5880.jpg'),
 PosixPath('Images/other/IMG_5980.jpg'),
 PosixPath('Images/other/IMG_6789.jpg'),
 PosixPath('Images/other/IMG_5955.jpg'),
 PosixPath('Images/other/IMG_5905.jpg'),
 PosixPath('Images/other/IMG_5790.jpg'),
 PosixPath('Images/other/IMG_6811.jpg'),
 PosixPath('Images/other/IMG_6218.jpg'),
 PosixPath('Images/other/IMG_6826.jpg'),
 PosixPath('Images/other/IMG_6804.jpg'),
 PosixPath('Imag

In [13]:
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))
labels

['other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',
 'other',


In [22]:
filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

image_df = pd.concat([filepaths, labels], axis=1)
image_df

Unnamed: 0,Filepath,Label
0,Images/other/IMG_5663.jpg,other
1,Images/other/IMG_5468.jpg,other
2,Images/other/IMG_6262.jpg,other
3,Images/other/IMG_5915.jpg,other
4,Images/other/IMG_6520.jpg,other
...,...,...
1257,Images/dandelion/IMG_1289.jpg,dandelion
1258,Images/dandelion/IMG_1307.jpg,dandelion
1259,Images/dandelion/IMG_1173.jpg,dandelion
1260,Images/dandelion/IMG_1270.jpg,dandelion


In [23]:
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

In [24]:
train_df

Unnamed: 0,Filepath,Label
491,Images/other/IMG_5507.jpg,other
1091,Images/dandelion/IMG_5397.jpg,dandelion
512,Images/other/IMG_5643.jpg,other
919,Images/dandelion/IMG_1284.jpg,dandelion
741,Images/dandelion/IMG_3900.jpg,dandelion
...,...,...
715,Images/dandelion/IMG_3934.jpg,dandelion
905,Images/dandelion/IMG_1133.jpg,dandelion
1096,Images/dandelion/IMG_3931.jpg,dandelion
235,Images/other/IMG_6246.jpg,other


In [25]:
test_df

Unnamed: 0,Filepath,Label
1180,Images/dandelion/IMG_1252.jpg,dandelion
255,Images/other/IMG_5994.jpg,other
304,Images/other/IMG_6256.jpg,other
403,Images/other/IMG_5648.jpg,other
75,Images/other/IMG_5801.jpg,other
...,...,...
37,Images/other/IMG_5837.jpg,other
320,Images/other/IMG_5661.jpg,other
823,Images/dandelion/IMG_5702.jpg,dandelion
1017,Images/dandelion/IMG_1312.jpg,dandelion


### Creating Generators

In [26]:
train_generator = ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input,
    validation_split=0.2
)

test_generator = ImageDataGenerator(
    preprocessing_function = tf.keras.applications.mobilenet_v2.preprocess_input
)

In [27]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode = 'rgb',
    class_mode = 'binary',
    batch_size=32,
    shuffle=False
)

Found 707 validated image filenames belonging to 2 classes.
Found 176 validated image filenames belonging to 2 classes.
Found 379 validated image filenames belonging to 2 classes.


### Downloading the Feature Extractor

In [28]:
feature_extractor = tf.keras.applications.MobileNetV2(
    input_shape = (224, 224, 3),
    weights = 'imagenet',
    include_top=False,
    pooling='avg'
)

feature_extractor.trainable = False

2024-09-15 13:31:02.910462: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


### Training

In [30]:
inputs = feature_extractor.input
x = tf.keras.layers.Dense(128, activation='relu')(feature_extractor.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [32]:
history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100


2024-09-15 13:38:51.776434: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2024-09-15 13:42:10.121510: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


### Results

In [39]:
predictions = np.squeeze(model.predict(test_images))
predictions = (predictions >= 0.5).astype(np.int32)

acc = accuracy_score(test_images.labels, predictions)
f1 = f1_score(test_images.labels, predictions)

print("Accuracy: {:.2f}%".format(acc * 100))
print("F1-Score: {:.5f}".format(f1))

2024-09-15 15:11:35.355559: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Accuracy: 84.96%
F1-Score: 0.84718
