## Flowers Classification

Using dataset from kaggle containing 900 different kinds of flowers we are going to create model to correctly predict those classes.

In [1]:
import os
import zipfile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

FLOWERS_PATH = os.path.join(os.getcwd(), 'data/flowers_dataset/')

#### Load and explore data

In [11]:
zip_path = os.path.join(os.getcwd(), 'data/archive.zip')
zip_file = zipfile.ZipFile(zip_path,'r')

zip_file.extractall(FLOWERS_PATH)
zip_file.close()
os.remove(zip_path)

In [2]:
flowers = pd.read_csv(os.path.join(FLOWERS_PATH, 'Flower_Table.csv'))

In [3]:
flowers.head()

Unnamed: 0,Flower group,Flower name,url,Containers,Flower Time,Flowers,Leaves,Life cycle,Miscellaneous,Plant Habit,...,Awards and Recognitions,Parentage,Child plants,Plant Height,Fruit,Minimum cold hardiness,Propagation: Other methods,Propagation: Seeds,Toxicity,Bloom Size
0,aeoniums,Aeonium 'Mardi Gras',https://garden.org/plants/view/676815/Aeonium-...,Needs excellent drainage in pots,"Late winter or early spring, Spring",Showy,"Unusual foliage color, Variegated",Perennial,Patent/Plant Breeders' Rights: USPP21407,Cactus/Succulent,...,,,,,,,,,,
1,aeoniums,Aeonium (Aeonium nobile),https://garden.org/plants/view/117036/Aeonium-...,"Suitable in 3 gallon or larger, Needs excellen...","Spring, Summer",Showy,Evergreen,Perennial,"Tolerates poor soil, Monocarpic",Cactus/Succulent,...,,,,,,,,,,
2,aeoniums,Variegated Pinwheel (Aeonium haworthii 'Varieg...,https://garden.org/plants/view/805995/Variegat...,,,Showy,,Perennial,,Cactus/Succulent,...,RHS AGM,,,,,,,,,
3,aeoniums,Aeonium 'Jolly Clusters',https://garden.org/plants/view/806612/Aeonium-...,,,Showy,,Perennial,,Cactus/Succulent,...,,Aeonium tabuliforme x Aeonium,,,,,,,,
4,aeoniums,Aeonium 'Emerald Ice',https://garden.org/plants/view/740710/Aeonium-...,,,Showy,"Evergreen, Variegated",Perennial,Patent/Plant Breeders' Rights: USPP27613,Cactus/Succulent,...,,Sport of: Party Platter,,,,,,,,


In [4]:
flowers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 29 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Flower group                900 non-null    object
 1   Flower name                 900 non-null    object
 2   url                         900 non-null    object
 3   Containers                  436 non-null    object
 4   Flower Time                 554 non-null    object
 5   Flowers                     643 non-null    object
 6   Leaves                      493 non-null    object
 7   Life cycle                  722 non-null    object
 8   Miscellaneous               233 non-null    object
 9   Plant Habit                 839 non-null    object
 10  Resistances                 393 non-null    object
 11  Soil pH Preferences         226 non-null    object
 12  Suitable Locations          183 non-null    object
 13  Sun Requirements            784 non-null    object

In [5]:
photos_path = os.path.join(FLOWERS_PATH, 'Flowers_Photo/')

def data_counter(path):
    """
    Counts images in each directory

    Args:
      -path (string): directory path containing images
    
    Returns:
      Dictionary containing directory names as keys and number of
      files in directory as values.
    """
    directory = os.listdir(path)
    values = {directory[i]: 0 for i in range(len(directory))}
    for folder in directory:
        folder_path = os.path.join(path, folder)
        for img in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img)
            if os.path.getsize(img_path) == 0:
                os.remove(img_path)
            else:
                values[folder] += 1

    return values

num_flowers = sum(data_counter(photos_path).values())
print(f"Number of examples in dataset is: {num_flowers}")

Number of examples in dataset is: 21433


In [6]:
from PIL import Image
from random import sample

def print_size(dir):
    """
    Print size of random image from each directory

    Args:
      -dir (string): directory path containing images
    
    Returns:
      None
    """
    for directory in os.listdir(dir):
        directory_path = os.path.join(dir, directory)
        image_p = sample(os.listdir(directory_path), 1)[0]
        image = Image.open(os.path.join(directory_path, image_p))
        print(f"Random image size from {directory} directory is: {image.size, image.mode}")


print_size(photos_path)

Random image size from Winter Honeysuckle (Lonicera fragrantissima) directory is: ((250, 250), 'RGB')
Random image size from Carrot (Daucus carota subsp. sativus 'Ultimate Hybrid') directory is: ((250, 250), 'RGB')
Random image size from Butterwort (Pinguicula moctezumae) directory is: ((250, 250), 'RGB')
Random image size from Sweet Potato (Ipomoea batatas Sweet Caroline Sweetheart Jet Black╬ô├ñ├│) directory is: ((250, 250), 'RGB')
Random image size from Smoketrees (Cotinus coggygria) directory is: ((250, 250), 'RGB')
Random image size from Cantaloupes (Cucumis melo) directory is: ((250, 250), 'RGB')
Random image size from Garlic (Allium sativum 'Georgian Crystal') directory is: ((250, 250), 'RGB')
Random image size from Astilbe 'Fanal' directory is: ((250, 250), 'RGB')
Random image size from Leeks (Allium ampeloprasum) directory is: ((250, 250), 'RGB')
Random image size from Western Red Cedar (Thuja plicata 'Whipcord') directory is: ((250, 250), 'RGB')
Random image size from Pumpkin 

In [7]:
from shutil import copyfile

def create_train_validation(data_path, training_path, validation_path, split_s):
    """
    Split the data into train and validation datasets

    Args:
      -data_path (string): directory path containing images
      -training_path (string): directory path to be used for training
      -validation_path (string): directory path to be used for validation
      -split_s (float): portion of the dataset to be used for training

    Returns:
      None
    """

    dirs = os.listdir(data_path)
    print("Data directories:")
    for img in dirs:
        print(f'-{img}')
    
    try:
        os.makedirs(training_path)
    except FileExistsError:
        print('\nTraining directory exists in this path')

    try:
        os.makedirs(validation_path)
    except FileExistsError:
        print('Validation directory exists in this path')

    direcotries_sizes = data_counter(data_path)
    
    for dir in dirs:
        train_name = os.path.join(training_path, dir)
        validation_name = os.path.join(validation_path, dir)
        os.makedirs(train_name)
        os.makedirs(validation_name)
        split = int(direcotries_sizes[dir] * split_s)
        
        curr_dir = os.path.join(data_path, dir)
        dir_shuffled = sample(os.listdir(curr_dir), len(os.listdir(curr_dir)))

        for img in dir_shuffled[:split]:
            copyfile(os.path.join(curr_dir, img), os.path.join(train_name, img))
        
        for img in dir_shuffled[split:]:
            copyfile(os.path.join(curr_dir, img), os.path.join(validation_name, img))
    
    # print(f'\nTraining sizes: {data_counter(training_path)}')
    # print(f'\nValidation sizes: {data_counter(validation_path)}')

    return training_path, validation_path

training_path, validation_path = create_train_validation(photos_path, os.path.join(FLOWERS_PATH, 'training/'), os.path.join(FLOWERS_PATH, 'validation/'), .7)

Data directories:
-Winter Honeysuckle (Lonicera fragrantissima)
-Carrot (Daucus carota subsp. sativus 'Ultimate Hybrid')
-Butterwort (Pinguicula moctezumae)
-Sweet Potato (Ipomoea batatas Sweet Caroline Sweetheart Jet Black╬ô├ñ├│)
-Smoketrees (Cotinus coggygria)
-Cantaloupes (Cucumis melo)
-Garlic (Allium sativum 'Georgian Crystal')
-Astilbe 'Fanal'
-Leeks (Allium ampeloprasum)
-Western Red Cedar (Thuja plicata 'Whipcord')
-Pumpkin (Cucurbita pepo 'Winter Luxury Pie')
-Echeveria (Echeveria runyonii)
-Radish (Raphanus sativus 'Early Scarlet Globe')
-Purple Basil (Ocimum basilicum 'Purple Delight')
-Dahlia 'AC Sadie'
-Onion (Allium cepa 'Red River F1')
-Strawberries (Fragaria)
-American Cranberrybush Viburnum (Viburnum opulus var. americanum)
-Morning Glory (Ipomoea 'Split Second')
-Morning Glory (Ipomoea nil 'Kikyo Snowflakes')
-Apricot (Prunus armeniaca 'Gold Kist')
-Aeonium 'Mardi Gras'
-Dwarf Caladium (Caladium humboldtii)
-Chinese Wisteria (Wisteria sinensis 'Prolific')
-Peach (Prun

In [8]:
from keras.preprocessing.image import ImageDataGenerator

def create_data(train_path, validation_path):
    """
    Create training validation data split

    Args:
      -training_path (string): directory path to be used for training
      -validation_path (string): directory path to be used for validation

    Returns:
      
    """
    training_datagen = ImageDataGenerator(rescale=1/255,
                                        rotation_range=90,
                                        width_shift_range=0.3,
                                        height_shift_range=0.3,
                                        fill_mode='nearest',
                                        horizontal_flip=0.2,
                                        vertical_flip=0.2,
                                        zoom_range=0.3)
    training_data = training_datagen.flow_from_directory(directory=train_path,
                                                        class_mode='categorical',
                                                        target_size=(250, 250),
                                                        batch_size=64,
                                                        shuffle=True)
    
    validation_datagen = ImageDataGenerator(rescale=1/255)

    validation_data = validation_datagen.flow_from_directory(directory=validation_path,
                                                            class_mode='categorical',
                                                            target_size=(250, 250),
                                                            batch_size=64)
    return training_data, validation_data

training_data, validation_data = create_data(training_path, validation_path)


Found 14588 images belonging to 900 classes.
Found 6845 images belonging to 900 classes.


## Model using TensorFlow functional API

We are going to create model from scratch using tf functional API

In [23]:
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dense, Dropout, Flatten
from keras import Input, Model

inputs = Input(shape=(250, 250 ,3))
x = Conv2D(16, (3, 3), activation='relu')(inputs)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(2,2)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Conv2D(256, (3, 3), activation='relu')(x)
x = Conv2D(256, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu')(x)
x = Conv2D(512, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.4)(x)
outputs = Dense(900, activation='softmax')(x)

model = Model(inputs, outputs, name='FlowersClassification')
model.summary()

Model: "FlowersClassification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 250, 250, 3)]     0         
                                                                 
 conv2d_114 (Conv2D)         (None, 248, 248, 16)      448       
                                                                 
 conv2d_115 (Conv2D)         (None, 246, 246, 16)      2320      
                                                                 
 batch_normalization_60 (Bat  (None, 246, 246, 16)     64        
 chNormalization)                                                
                                                                 
 max_pooling2d_43 (MaxPoolin  (None, 123, 123, 16)     0         
 g2D)                                                            
                                                                 
 conv2d_116 (Conv2D)         (None, 121, 121,

In [24]:
model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

history = model.fit(training_data,
                    epochs=20,
                    validation_data=validation_data)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

### Transfer Learning

In [38]:
from keras.applications import MobileNetV2

transfer = MobileNetV2(input_shape=(250, 250, 3),
                            include_top=False,
                            weights='imagenet')

transfer.summary()

for layer in transfer.layers:
    layer.trainable = False


last_layer = transfer.get_layer('Conv_1_bn')
print('last layer output shape: ', last_layer.output_shape)


Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_20 (InputLayer)          [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 125, 125, 32  864         ['input_20[0][0]']               
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 125, 125, 32  128         ['Conv1[0][0]']                  
                                )                                              

In [39]:
z = Flatten()(last_layer.output)
z = Dense(1024, activation='relu')(z)
z = Dropout(0.3)(z)
transfer_outputs = Dense(900, activation='softmax')(z)

transfer_model = Model(transfer.input, transfer_outputs)
transfer_model.summary()

transfer_model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_20 (InputLayer)          [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 125, 125, 32  864         ['input_20[0][0]']               
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 125, 125, 32  128         ['Conv1[0][0]']                  
                                )                                                           

In [40]:
history_transfer = transfer_model.fit(training_data,
                                      epochs=20,
                                      validation_data=validation_data)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20

KeyboardInterrupt: 

As we can se for now every architecture is failing to learn.
We are going to try for MobileNet architecture to train from scratch on our dataset.
Models are doing bad, because we have 900 and only 22 000 examples, even with data augumentation we are doing pretty poorly.

#### ResNet50 Architecture

In [43]:
from keras.applications import ResNet50V2

#Load ResNet50 model without weights to train from scratch
resnet = ResNet50V2(include_top=False,
                    input_shape=(250,  250, 3),
                    weights=None)


#Get specified layer from ResNet model
last_resnet_layer = resnet.get_layer('post_bn')
print('last layer output shape: ', last_layer.output_shape)


y = Flatten()(last_resnet_layer.output)
y = Dense(1024, activation='relu')(y)
resnet_output = Dense(900, activation='softmax')(y)

resnet_model = Model(resnet.input, resnet_output)

resnet_model.summary()

last layer output shape:  (None, 8, 8, 1280)
Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_23 (InputLayer)          [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 256, 256, 3)  0           ['input_23[0][0]']               
                                                                                                  
 conv1_conv (Conv2D)            (None, 125, 125, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                               

In [44]:
resnet_model.compile(optimizer='adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

In [45]:
history_resnet = transfer_model.fit(training_data,
                                      epochs=20,
                                      validation_data=validation_data)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
 28/228 [==>...........................] - ETA: 1:43 - loss: 6.1210 - accuracy: 0.0185

KeyboardInterrupt: 