In [1]:
!pip install tensorflow==2.1

### Installing all the libriaries and dependencies

In [2]:
# Standard Imports for working with data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import scipy.io
import tarfile
import csv
import sys
import os

# Neural Network frameworks
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.models as M
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
import tensorflow.keras.callbacks as C
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, EarlyStopping, Callback
#from tensorflow.keras.regularizers import l2
#from tensorflow.applications.inception_v3 import Inception_v3
from tensorflow.keras import optimizers

from sklearn.model_selection import train_test_split

# Image output modules
import PIL
from PIL import ImageOps, ImageFilter

# Image visual output settings
from pylab import rcParams
rcParams['figure.figsize'] == 10, 5
%config InlineBackend.figure_format = 'svg'
%matplotlib inline



### Setting up the configs for the future modelling

In [3]:
!pwd

In [4]:
# All the main settings for NN will be listed below for convinient

EPOCHS = 50
BATCH_SIZE = 128
LR = 1e-3
VAL_SPLIT = 0.2

CLASS_NUM = 102
IMG_SIZE = 128
IMG_CHANNELS = 3
input_shape = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

DATA_PATH = '/kaggle/input/flower-dataset-102/'
PATH = '/kaggle/working/flower/'

### Importing the data

In [5]:
# Get all the data from the archive files in the directory
def get_filenames(tgz):
    with tarfile.open(tgz) as file:
        return[i.name for i in file.getmembers() if i.isfile()]


df = pd.DataFrame()
df['Id'] = sorted(get_filenames(DATA_PATH + '102flowers.tgz'))
df['Category'] = scipy.io.loadmat(DATA_PATH + 'imagelabels.mat')['labels'][0] - 1 # classes for earch 0,..,N
df['Category'] = df['Category'].astype('str')

# Explore if everything is ok
df.head(4)

In [6]:
df['Category'].value_counts()

In [7]:
# Due to imbalanced classes we need to perform something with this
pd.DataFrame(df['Category'].value_counts(sort=False)).plot(kind='barh', figsize=(10, 20));

In [8]:
# How much classes in the dataset
print(f"Dataset contains {df['Category'].nunique()} classes.")

In [9]:
# Unzipping the images that are in the dataset
opened_tgz = tarfile.open(DATA_PATH + "102flowers.tgz")
opened_tgz.extractall(PATH)

# Check the files
print(os.listdir(PATH + 'jpg')[:12])

In [10]:
# Example of the images that dataset contains
plt.figure(figsize = (10, 6))

random_image = df.sample(n = 12)
random_image_paths = random_image['Id'].values
random_image_category = random_image['Category'].values

for index, path in enumerate(random_image_paths):
    ax = PIL.Image.open(PATH + path)
    plt.subplot(4, 3, index + 1)
    plt.imshow(ax)
    plt.title("Class: " + str(random_image_category[index]))
    plt.axis("off")

plt.show();

In [11]:
# train/test splitting
X_train, X_test, y_train, y_test = train_test_split(df['Id'], df['Category'], test_size = 0.2, stratify = df['Category'])

X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

X_train['Category'] = y_train
X_test['Category'] = y_test

print(f"Shape of the Train Data: {X_train.shape}\nShape of the Test Data: {X_test.shape}")

### Data Augmentation and Generator Creation

In [12]:
# Augmentation is very important part when you do not have a big data to train the model
train_datagen = ImageDataGenerator(rescale = 1. / 255,
                                   rotation_range = 50, shear_range = 0.2,
                                   zoom_range = [0.75, 1.25],
                                   brightness_range = [0.5, 1.5],
                                   width_shift_range = 0.1,
                                   height_shift_range = 0.1,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1. / 255)

In [13]:
# Wrapping all data to the generator
train_generator = train_datagen.flow_from_dataframe(dataframe = X_train, directory = PATH, x_col = 'Id', y_col = "Category",
                                                    target_size = (IMG_SIZE, IMG_SIZE), batch_size = BATCH_SIZE, class_mode = 'categorical',
                                                    shuffle = True, seed = 42)

test_generator = test_datagen.flow_from_dataframe(dataframe = X_test, directory = PATH, x_col = 'Id', y_col = "Category", 
                                                  target_size = (IMG_SIZE, IMG_SIZE), batch_size = BATCH_SIZE, class_mode = 'categorical',
                                                 shuffle = False, seed = 42)


In [14]:
from skimage import io

def imshow(image):
    io.imshow(image)
    io.show()
    
x, y = train_generator.next()
print("Image Train example:")
plt.figure(figsize = (10, 10))

for i in range(0, 9):
    image = x[i]
    plt.subplot(3, 3, i + 1)
    plt.imshow(image)

plt.show();

### Neural Network Modelling

In [15]:
input_shape

In [16]:
model = M.Sequential()

# model's Layer[1]
model.add(L.Convolution2D(128, (3, 3), input_shape = input_shape, activation = 'elu', padding = 'same',))
model.add(L.BatchNormalization())
model.add(L.Convolution2D(128, (3, 3), input_shape = input_shape, activation = 'elu', padding = 'same',))
model.add(L.BatchNormalization())

# Pooling for the model's Layer[1]
model.add(L.MaxPooling2D(pool_size = (2, 2)))
model.add(L.Dropout(0.2))

# model's Layer[2]
model.add(L.Convolution2D(256, (3, 3), activation = 'elu', padding = 'same',))
model.add(L.BatchNormalization())
model.add(L.Convolution2D(256, (3, 3), activation = 'elu', padding = 'same',))
model.add(L.BatchNormalization())

# Pooling for the model's Layer[2]
model.add(L.MaxPooling2D(pool_size = (2, 2)))
model.add(L.Dropout(0.3))

# model's Layer[3]
model.add(L.Convolution2D(512, (3, 3), activation = 'elu', padding = 'same'))
model.add(L.BatchNormalization())
model.add(L.Convolution2D(512, (3, 3), activation = 'elu', padding = 'same'))
model.add(L.BatchNormalization())

# Pooling for the model's Layer[3]
model.add(L.MaxPooling2D(pool_size = (2, 2)))
model.add(L.Dropout(0.3))



# Flattening the output data after convolution and pooling. The Data comes from the last 3rd layer
model.add(L.Flatten())

# Adding Fully Connected Layer
model.add(L.Dense(512, activation = 'elu'))
model.add(L.Dropout(0.25))
model.add(L.Dense(CLASS_NUM, activation = 'softmax'))


In [17]:
model.summary()

### Model Training

In [18]:
tf.keras.backend.clear_session()

In [19]:
# LR = 0.01
model.compile(loss = 'categorical_crossentropy', optimizer = optimizers.Adamax(lr = LR), metrics = ['accuracy'])

In [20]:
checkpoint = ModelCheckpoint('best_model.hdf5', monitor = ['val_accuracy'], verbose = 1, mode = 'max')
earlystop = EarlyStopping(monitor = 'val_accuracy', patience = 5, restore_best_weights = True)
callbacks_list = [checkpoint, earlystop]

In [21]:
scores = model.evaluate_generator(test_generator, steps = 1, verbose = 1)
print("Accuracy: %.2f%%" % (scores[1] * 100))

In [22]:
# Training
history = model.fit_generator(train_generator, 
                              steps_per_epoch = train_generator.samples // train_generator.batch_size,
                              validation_data = test_generator,
                              validation_steps= test_generator.samples//test_generator.batch_size,
                              epochs = EPOCHS,
                              callbacks = callbacks_list)

### Transfer Learning.

In [23]:
base_model = tf.keras.applications.ResNet50V2(weights = 'imagenet', include_top = False, input_shape = input_shape)

In [24]:
base_model.summary()

In [25]:
model = M.Sequential()
model.add(base_model)
model.add(L.GlobalAveragePooling2D(),)
#model.add(L.Dense(512, activation='elu'))
#model.add(L.BatchNormalization())
model.add(L.Dense(CLASS_NUM, activation = 'softmax'))

In [26]:
model.summary()

In [27]:
model.compile(loss = 'categorical_crossentropy', optimizer = optimizers.Adamax(lr = LR), metrics = ['accuracy'])

In [28]:
checkpoint = ModelCheckpoint('best_model.hdf5', monitor = ['val_accuracy'], verbose = 1, mode = 'max')
earlystop = EarlyStopping(monitor = 'val_accuracy', patience = 5, restore_best_weights = True)
callbacks_list = [checkpoint, earlystop]

In [29]:
scores = model.evaluate_generator(test_generator, steps = 1, verbose = 1)
print("Accuracy: %.2f%%" % (scores[1] * 100))

In [30]:
history = model.fit_generator(train_generator,
                              steps_per_epoch = train_generator.samples // train_generator.batch_size,
                              validation_data = test_generator, 
                              validation_steps= test_generator.samples // test_generator.batch_size,
                              epochs = 25,
                              callbacks = callbacks_list)

In [31]:
model.save('/kaggle/working/model_last.hdf5')
model.load_weights('best_model.hdf5')

In [32]:
scores = model.evaluate_generator(test_generator, steps = 1, verbose = 1)
print("Accuracy: %.2f%%" % (scores[1] * 100))

In [33]:
def plot_history(history):
    plt.figure(figsize = (10, 5))
    plt.style.use('dark_background')
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(acc))
    
    plt.plot(epochs, acc, 'b', label = 'Training accuracy')
    plt.plot(epochs, val_acc, 'r', label = 'Validation accuracy')
    plt.title("Training and validation accuracy")
    plt.legend()
    
    
    plt.figure(figsize = (10,5))
    plt.style.use('dark_background')
    plt.plot(epochs, loss, 'b', label = 'Training loss')
    plt.plot(epochs, val_loss, 'r', label = 'Validation loss')
    plt.title('Training and Validation loss')
    plt.legend()
    
    plt.show()

In [34]:
plot_history(history)