In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

#import numpy as np # linear algebra
#import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -q efficientnet

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os 
import random
import math
from matplotlib import pyplot as plt
import cv2
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import tensorflow as tf
import tensorflow.keras.backend as K
#import efficientnet.tfkeras as efn
from tensorflow.keras.applications.efficientnet_v2 import EfficientNetV2L
import efficientnet.tfkeras as efn
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Sequential
import tensorflow.keras.layers as L
from tensorflow.keras import layers
import tensorflow.keras as keras
from tensorflow.keras.layers import Input, Conv2D 
from tensorflow.keras.layers import MaxPool2D, Flatten, Dense 
from tensorflow.keras import Model


from keras.utils.np_utils import to_categorical

In [None]:
# Set Random Seed
def set_seed(seed: int = 42) -> None:
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")
set_seed()

In [None]:
# Configuration
IMAGE_SIZE = 224
BATCH_SIZE = 16 

# 1.Preprocessing

In [None]:
raw_dir = '/kaggle/input/flower-image-dataset/flowers'
paths = []
labels = []
for i in os.listdir(raw_dir):
    paths.append(os.path.join(raw_dir,i))
    labels.append(i.split('_')[0])

raw_input = pd.DataFrame({'path':paths,
                          'label':labels})
raw_input.head()

In [None]:
# Check the number of images from each category
df_count = raw_input['label'].value_counts().rename_axis('label').reset_index(name='counts')
df_count

In [None]:
# Convert label to Int category
LE = LabelEncoder()
raw_input['category'] = LE.fit_transform(raw_input['label'])

In [None]:
# Check Encoding
raw_input[['label','category']].drop_duplicates()

In [None]:
raw_input.head()

# 2.Prepare Images

In [None]:
# Prepare images
all_images = []
for i in tqdm(paths):
    image = cv2.imread(i)
    image = cv2.resize(image,(IMAGE_SIZE,IMAGE_SIZE),interpolation=cv2.INTER_AREA)
    all_images.append(image)

In [None]:
## Plotting Random 9 Images for each Flower Category
for label in df_count.label.values:
    print(f"Start Plotting Random Images for \033[1m{label}\033[0m Flowers")
    image_index = sorted(random.sample(set(raw_input[raw_input['label']==label].index), 9))
    plt.figure(figsize=(10,10))
    for i in range(9):
        plt.subplot(3,3,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.title(f'{label} - Index: {image_index[i]}') 
        plt.grid(False)
        plt.imshow(cv2.cvtColor(all_images[image_index[i]], cv2.COLOR_BGR2RGB))
    plt.show()
    print(" ")
    

In [None]:
# Plot first 25 images with label
plt.figure(figsize=(10,10))
for i in range(25) :
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.title(labels[i]) 
    plt.grid(False)
    plt.imshow(cv2.cvtColor(all_images[i], cv2.COLOR_BGR2RGB))

# 3.Train test split

In [None]:
# Train test split
X = raw_input['path'].values
y = to_categorical(raw_input['category'], num_classes = 10)
X_2, X_test, y_2, y_test = train_test_split(X, y, test_size=0.095, random_state=42,shuffle = True, stratify = y)
X_train, X_val, y_train, y_val = train_test_split(X_2, y_2, test_size=0.2, random_state=42,shuffle = True, stratify = y_2)

In [None]:
print(f'X_train has {len(X_train)} images; X_val has {len(X_val)} images; X_test has {len(X_test)} images')

# 4.Prepare Tensorflow Dataset

In [None]:
# Prepare Data

def load_dataset(filepath,label):
    dataset = tf.data.Dataset.from_tensor_slices((filepath, label))
    return dataset


def decode_image(filepath, label=None):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if label is None:
        return image
    else:
        return image, label

## 4.1.Rotation Transformation 

In [None]:
# Get idea from https://www.kaggle.com/code/cdeotte/rotation-augmentation-gpu-tpu-0-96

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))

In [None]:
def transform(image,label):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = IMAGE_SIZE
    XDIM = DIM%2 #fix for size 331
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 5. * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 16. * tf.random.normal([1],dtype='float32') 
    w_shift = 16. * tf.random.normal([1],dtype='float32') 
  
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3]),label

## 4.2.Prepare Train/Test/Valid Dataset

In [None]:
def get_training_dataset(dataset,do_aug=True):
    dataset = dataset.map(decode_image)
    dataset = dataset.cache()
    dataset = dataset.map(data_augment)
    if do_aug: 
        dataset = dataset.map(transform)
    #dataset = dataset.shuffle(128)
    dataset = dataset.batch(BATCH_SIZE,drop_remainder=True)
    return dataset

def get_validation_dataset(dataset):
    dataset = dataset.map(decode_image)
    dataset = dataset.cache()
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

def get_test_dataset(dataset):
    dataset = dataset.map(decode_image)
    dataset = dataset.cache()
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

In [None]:
# Display Augmentation Example

row = 4; col = 4;
all_elements = get_training_dataset(load_dataset(X_train, y_train),do_aug=False).unbatch()
one_element = tf.data.Dataset.from_tensors( next(iter(all_elements)) )
augmented_element = one_element.repeat().map(transform).batch(row*col)

for (img,label) in augmented_element:
    plt.figure(figsize=(10,int(10*row/col)))
    for j in range(row*col):
        plt.subplot(row,col,j+1)
        plt.axis('off')
        plt.imshow(img[j,])
    plt.show()
    break

## 4.3.Get Train/Test/Valid Dataset

In [None]:
# train_dataset = get_training_dataset(load_dataset(X_train, y_train),do_aug=True)

# valid_dataset = get_validation_dataset(load_dataset(X_val, y_val))

# test_dataset =  get_test_dataset(load_dataset(X_test, y_test))

# 5.Build Model

## 5.1.VGG16 - From Scratch - 224*224

In [None]:
# # input

# input = Input(shape =(IMAGE_SIZE,IMAGE_SIZE,3))
# # 1st Conv Block

# x = Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu')(input)
# x = Conv2D (filters =64, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# # 2nd Conv Block

# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# # 3rd Conv block

# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# # 4th Conv block

# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# # 5th Conv block

# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# # Fully connected layers

# x = Flatten()(x)
# x = Dense(units = 4096, activation ='relu')(x)
# x = Dense(units = 4096, activation ='relu')(x)
# output = Dense(units = 10, activation ='softmax')(x)
# # creating the model

# model = Model (inputs=input, outputs =output)
# model.summary()

In [None]:
# model.compile(
#                 optimizer = 'Adam',
#                 loss = 'categorical_crossentropy',
#                 metrics=['categorical_accuracy']
#               )

In [None]:
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)
# history = model.fit(train_dataset,validation_data=valid_dataset,epochs=30, callbacks = [early_stopping])

## 5.2.VGG16 - Transfer Learning - 224*224

In [None]:
# base_model = tf.keras.applications.VGG16(weights = 'imagenet', include_top = False, input_shape = (IMAGE_SIZE,IMAGE_SIZE,3))
# for layer in base_model.layers:
#     layer.trainable = False
# base_model.summary()

In [None]:
# model = tf.keras.Sequential([
#             base_model,
#             tf.keras.layers.Flatten(),
#             tf.keras.layers.Dense(4096, activation='relu'),
#             tf.keras.layers.Dense(4096, activation='relu'),
#             tf.keras.layers.Dense(10, activation='softmax')
#                             ])
# model.summary()

In [None]:
# model.compile(
#                 optimizer = 'Adam',
#                 loss = 'categorical_crossentropy',
#                 metrics=['categorical_accuracy']
#               )

In [None]:
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)
# history = model.fit(train_dataset,validation_data=valid_dataset,epochs=30, callbacks = [early_stopping])

In [None]:
# # Plotting Model Loss Performance
# fig = plt.figure()
# plt.plot(history.history['loss'], color='green', label='loss')
# plt.plot(history.history['val_loss'], color='orange', label='val_loss')
# fig.suptitle('Loss', fontsize=20)
# plt.legend(loc='upper right')
# plt.xlabel('Epoch', fontsize=18)
# plt.show()

In [None]:
# # Plotting Model Accuracy Performance
# fig = plt.figure()
# plt.plot(history.history['categorical_accuracy'], color='green', label='accuracy')
# plt.plot(history.history['val_categorical_accuracy'], color='orange', label='val_accuracy')
# fig.suptitle('Accuracy', fontsize=20)
# plt.legend(loc='lower right')
# plt.xlabel('Epoch', fontsize=18)
# plt.show()

In [None]:
# test_acc = model.evaluate(test_dataset)[1]
# print(f'Accuracy on Test(Unseen) Dataset is \033[1m{test_acc:.2%}\033[0m')

## 5.3.EfficientnetB7 - 224*224

In [None]:
# efnet = efn.EfficientNetB7(
#     input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
#     weights='noisy-student',
#     include_top=False,
#     pooling='avg')
# efnet.trainable = False

In [None]:
#  model = tf.keras.Sequential([
#             efnet,
#             tf.keras.layers.Dense(128, activation='relu'),
#             tf.keras.layers.Dense(10, activation='softmax')
#                             ])

In [None]:
# model.compile(
#                 optimizer = 'Adam',
#                 loss = 'categorical_crossentropy',
#                 metrics=['categorical_accuracy']
#               )
# model.summary()

In [None]:
# early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)
# history = model.fit(train_dataset,validation_data=valid_dataset,epochs=30, callbacks = [early_stopping])

In [None]:
# # Plotting Model Loss Performance
# fig = plt.figure()
# plt.plot(history.history['loss'], color='green', label='loss')
# plt.plot(history.history['val_loss'], color='orange', label='val_loss')
# fig.suptitle('Loss', fontsize=20)
# plt.legend(loc='upper right')
# plt.xlabel('Epoch', fontsize=18)
# plt.show()

In [None]:
# test_acc = model.evaluate(test_dataset)[1]
# print(f'Accuracy on Test(Unseen) Dataset is \033[1m{test_acc:.2%}\033[0m')

## 5.4.EfficientnetB7 - 800*800

In [None]:
# Get new dataset
IMAGE_SIZE = 600

train_dataset = get_training_dataset(load_dataset(X_train, y_train),do_aug=True)

valid_dataset = get_validation_dataset(load_dataset(X_val, y_val))

test_dataset =  get_test_dataset(load_dataset(X_test, y_test))

In [None]:
efnet = efn.EfficientNetB0(
    input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
    weights='noisy-student',
    include_top=False,
    pooling='avg')
efnet.trainable = False

In [None]:
 model = tf.keras.Sequential([
            efnet,
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dense(768, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
                            ])

In [None]:
model.compile(
                optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
                loss = 'categorical_crossentropy',
                metrics=['categorical_accuracy']
              )
model.summary()

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)
history = model.fit(train_dataset,validation_data=valid_dataset,epochs=50, callbacks = [early_stopping])

In [None]:
# Plotting Model Loss Performance
fig = plt.figure()
plt.plot(history.history['loss'], color='green', label='loss')
plt.plot(history.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc='upper right')
plt.xlabel('Epoch', fontsize=18)
plt.show()

In [None]:
# Plotting Model Accuracy Performance
fig = plt.figure()
plt.plot(history.history['categorical_accuracy'], color='green', label='accuracy')
plt.plot(history.history['val_categorical_accuracy'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc='lower right')
plt.xlabel('Epoch', fontsize=18)
plt.show()

In [None]:
test_acc = model.evaluate(test_dataset)[1]
print(f'Accuracy on Test(Unseen) Dataset is \033[1m{test_acc:.2%}\033[0m')

In [None]:
predict = model.predict(test_dataset, verbose=1)

### 5.4.1.Plot Wrongly Classified Images

In [None]:
predict_result = pd.DataFrame({"path":X_test,
                               "true_label":LE.inverse_transform(y_test.argmax(axis=1)),
                               "predict_label":LE.inverse_transform(predict.argmax(axis=1))})

In [None]:
wrong_predict = predict_result[predict_result['predict_label'] != predict_result['true_label']]
print(f'{len(wrong_predict)} Images are Wrongly Classified')

In [None]:
wrong_classified_images = []
for i in tqdm(wrong_predict.path.values):
    image = cv2.imread(i)
    wrong_classified_images.append(image)

In [None]:
print(f"Start Plotting Wrongly Classified Flowers")
for i in range(len(wrong_classified_images)):
    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    plt.title(f'Predicted Category: {wrong_predict.predict_label.values[i]} - True Category: {wrong_predict.true_label.values[i]}') 
    plt.grid(False)
    plt.imshow(cv2.cvtColor(wrong_classified_images[i], cv2.COLOR_BGR2RGB))
    plt.show()
    print(" ")
    