# setup

In [None]:
!pip install -U keras-tuner # for hyperparameter tuning

In [None]:
# mount to google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#! pip install kaggle

In [None]:
#mkdir /root/kaggle

In [None]:
# ! cp /content/drive/MyDrive/FISH_CLASSIFICATION/kaggle.json ~/.kaggle/

In [None]:
# ! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# ! kaggle datasets download -d crowww/a-large-scale-fish-dataset

# Data Preparation

In [None]:
# unzip dataset
'''
import zipfile, os

local_zip = '/content/a-large-scale-fish-dataset.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content/drive/MyDrive/FISH_CLASSIFICATION/dataset')
zip_ref.close()
'''

In [None]:
# library
import pandas as pd
import numpy as np
from pathlib import Path
import os.path

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix, classification_report

from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.models import *
from tensorflow.keras.preprocessing.image import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.applications.efficientnet import *

In [None]:
img_dir = Path('/content/drive/MyDrive/FISH_CLASSIFICATION/dataset/Fish_Dataset')

In [None]:
filepaths = list(img_dir.glob(r'**/*.png'))

In [None]:
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

img_df = pd.concat([filepaths, labels], axis=1)

# drop GT images
# GT = ground truth
img_df = img_df[img_df['Label'].apply(lambda x: x[-2:] != 'GT')]
img_df

In [None]:
# get a random sample of items from an axis of object
# frac is Fraction of axis items to return.
img_df = img_df.sample(frac=1).reset_index(drop=True)
img_df

In [None]:
img_df['Label'].unique()

In [None]:
img_df.Label.value_counts()

In [None]:
# display 10 picture of the dataset with their labels
fig, axes = plt.subplots(nrows=2,  ncols=5, figsize=(15,7), subplot_kw={'xticks':[], 'yticks':[]})

# Axes.flat means a 1D iterator over the array
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(img_df.Filepath[i]))
    ax.set_title(img_df.Label[i])
plt.tight_layout()
plt.show()

In [None]:
import cv2

im = cv2.imread(img_df.Filepath[1])
print('shape of img',im.shape)

split data to 80% train, 10% validation, and 10% test

In [None]:
# splitting data to train, validation and test sets
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 1-train_ratio-val_ratio

train_df, test_df = train_test_split(img_df, test_size = 1-train_ratio, random_state = 42)
val_df, test_df = train_test_split(test_df, test_size = test_ratio/(test_ratio + val_ratio), random_state = 42)

print('Total image in data train:', train_df.shape)
print('Total image in data validation:', val_df.shape)
print('Total image in data test:', test_df.shape)

# Data augmentation
Data augmentation is a technique that used to increase the amount of data by adding slightly modified copies of already existing data or newly created synthetic data from existing data

In [None]:
from keras.preprocessing.image import ImageDataGenerator

aug_generator = ImageDataGenerator(
    rescale = 1./255,
    horizontal_flip = True,
    vertical_flip = True,
    rotation_range = 40,
    width_shift_range  = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    fill_mode = 'nearest',
    validation_split = 0.2
)

In [None]:
img_size = (224, 224)

train_img = aug_generator.flow_from_dataframe(
    dataframe = train_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = img_size,
    color_mode = 'rgb',
    class_mode = 'categorical',
    shuffle = True
    )

val_img = aug_generator.flow_from_dataframe(
    dataframe = val_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = img_size,
    color_mode = 'rgb',
    class_mode = 'categorical',
    shuffle = False
    )

test_img = aug_generator.flow_from_dataframe(
    dataframe = test_df,
    x_col = 'Filepath',
    y_col = 'Label',
    target_size = img_size,
    color_mode = 'rgb',
    class_mode = 'categorical',
    shuffle = False
    )

In [None]:
print(len(train_img))
print(len(val_img))
print(len(test_img))

In [None]:
from mpl_toolkits.axes_grid1 import ImageGrid

def show_grid(image_list,nrows,ncols,label_list=None,show_labels=False,savename=None,figsize=(10,10),showaxis='off'):
    if type(image_list) is not list:
        if(image_list.shape[-1]==1):
            image_list = [image_list[i,:,:,0] for i in range(image_list.shape[0])]
        elif(image_list.shape[-1]==3):
            image_list = [image_list[i,:,:,:] for i in range(image_list.shape[0])]
    fig = plt.figure(None, figsize,frameon=False)
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(nrows, ncols),  # creates 2x2 grid of axes
                     axes_pad=0.3,  # pad between axes in inch.
                     share_all=True,
                     )
    for i in range(nrows*ncols):
        ax = grid[i]
        ax.imshow(image_list[i],cmap='Greys_r')  # The AxesGrid object work as a list of axes.
        ax.axis('off')
        if show_labels:
            ax.set_title(class_mapping[y_int[i]])
    if savename != None:
        plt.savefig(savename,bbox_inches='tight')

In [None]:
x, y = next(train_img)

In [None]:
show_grid(x,4,6,label_list=None,show_labels=False,figsize=(20,12))


# CNN (no pretrained model)

## setup

In [None]:
import warnings
warnings.filterwarnings("ignore")

import os, random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

float_formatter = lambda x: '%.4f' % x
np.set_printoptions(formatter={'float_kind':float_formatter})
np.set_printoptions(threshold=np.inf, suppress=True, precision=4)

plt.style.use("seaborn-colorblind")
plt.rcParams["figure.figsize"] = (8, 6)
sns.set_style("darkgrid")
#sns.set_context("talk")
sns.set_context(context='notebook', font_scale=1.25)
sns.set_style({"font.sans-serif": ["Verdana", "Arial", "Calibri", "DejaVu Sans"]})

# NOTE: It is important that you set a seed value to get same results in every run.
# Any number is Ok.
seed = 123
random.seed(seed)
np.random.seed(seed)

In [None]:
import keras.backend as K
import tensorflow as tf
from keras.layers import (Input, Dense, Dropout, Conv2D, MaxPooling2D, Activation,
                          BatchNormalization, Flatten)
from keras.models import Model
from keras import regularizers
from keras.callbacks import ReduceLROnPlateau, EarlyStopping


K.clear_session()   # start afresh each time!!

In [None]:
def build_model(use_l2_reg=False, use_dropout=False, lr=0.0001,
                l2_loss_lambda=0.0015):
  K.clear_session()

  assert l2_loss_lambda is not None

  l2 = regularizers.l2(l2_loss_lambda) if use_l2_reg else None

  if l2 is not None: print('Using L2 regularization %.6f' % l2_loss_lambda)

  inputs = Input(shape=(224, 224, 3))

  x = Conv2D(filters=32, kernel_size=(3, 3), padding='same', kernel_regularizer=l2, activation='relu')(inputs)
  x = MaxPool2D(pool_size=(2, 2))(x)
  if use_dropout: x = Dropout(0.15)(x)

  x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', kernel_regularizer=l2, activation='relu')(x)
  x = MaxPool2D(pool_size=(2, 2))(x)
  if use_dropout: x = Dropout(0.2)(x)

  x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', kernel_regularizer=l2, activation='relu')(x)
  x = MaxPool2D(pool_size=(2, 2))(x)
  if use_dropout: x = Dropout(0.3)(x)

  x = Flatten()(x)
  if use_dropout: x = Dropout(0.4)(x)

  x = Dense(256, activation='relu', kernel_regularizer=l2)(x)
  if use_dropout: x = Dropout(0.4)(x)
  x = Dense(512, activation='relu', kernel_regularizer=l2)(x)
  if use_dropout: x = Dropout(0.2)(x)

  outputs = Dense(9, activation='softmax')(x)
  model = Model(inputs=inputs, outputs=outputs)
  model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
    )
  return model

In [None]:
checkpoint_path = '/content/drive/MyDrive/FISH_CLASSIFICATION/my_model/basemodel.h5'

callbacks = [
    EarlyStopping(monitor='val_loss', mode='min', patience=10, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=5, min_lr=0.000001, verbose=1),
    ModelCheckpoint(monitor='val_loss', mode='min', filepath=checkpoint_path, verbose=1, save_best_only=True, save_weights_only=False)
]

## Base model
a model with no regularization

### Pre-tuned

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
#create model
model = Sequential()
#add model layers
model.add(Conv2D(32, padding='same', kernel_size=3, activation='relu', input_shape=(224,224,3)))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64, padding='same', kernel_size=3, activation='relu', input_shape=(224,224,3)))
model.add(MaxPool2D(2,2))
model.add(Conv2D(128, padding='same', kernel_size=3, activation='relu', input_shape=(224,224,3)))
model.add(MaxPool2D(2,2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(9, activation='softmax'))


In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='basemodel.png', show_shapes=True, show_layer_names=True)

In [None]:
history = model.fit(
    train_img,
    validation_data=val_img,
    epochs=50,
    callbacks=callbacks
)

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
load_model = keras.models.load_model('/content/drive/MyDrive/FISH_CLASSIFICATION/my_model/MyModel.h5')

In [None]:
load_model.evaluate(test_img)

In [None]:
labels = list(test_img.class_indices.keys())   
y_true = test_img.classes
y_pred = load_model.predict_generator(test_img)
rounded_pred = np.argmax(y_pred, axis=1)
cm = confusion_matrix(y_true=y_true, y_pred=rounded_pred)
plot_confusion_matrix(cm, labels, title="ResNet Pretuned")
report = classification_report(y_true, rounded_pred, target_names=labels)
print(report)

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='resnet_pretuned.png', show_shapes=True, show_layer_names=True)