In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import keras
import os
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator, img_to_array
from keras.applications.inception_v3 import preprocess_input
from keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.applications.inception_v3 import InceptionV3
from glob import glob
from dask import bag
from dask.diagnostics import ProgressBar
from PIL import Image
from tensorflow.keras.preprocessing import image
from keras.utils import image_dataset_from_directory
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications.resnet50 import preprocess_input,decode_predictions
from IPython.display import SVG, Image
import matplotlib.cm as cm
from IPython.display import Image, display
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras import regularizers
from tensorflow.keras import layers
from keras.models import load_model
%matplotlib inline

EDA

In [None]:
TRAIN_PATH = 'dataset/train/'
TEST_PATH = 'dataset/test/'

1) Check amount of data and data classes

In [None]:
train_ds = image_dataset_from_directory(
  TRAIN_PATH,
  seed = 123,
  image_size = (224, 224))

print(train_ds.class_names)

In [None]:
class_names = train_ds.class_names
for images, labels in train_ds.take(1):
    # print(class_names[labels[2]])
    print(labels)

Identify Class Imbalance: Numbers of Training Data for Each Classes

In [None]:
number_train_classes = {
'Caribou': len(os.listdir('dataset/train/caribou')),
'Deer': len(os.listdir('dataset/train/deer')),
'Elk': len(os.listdir('dataset/train/elk')),
'Moose': len(os.listdir('dataset/train/moose'))}

In [None]:
plt.bar(number_train_classes.keys(), number_train_classes.values(), width = .5);
plt.title("Number of Images by Train Class");
plt.xlabel('Class Name');
plt.ylabel('# Images');

Identify Class Imbalance: Numbers of Testing Data for Each Classes

In [None]:
number_test_classes = {
'Caribou': len(os.listdir('dataset/train/caribou')),
'Deer': len(os.listdir('dataset/train/deer')),
'Elk': len(os.listdir('dataset/train/elk')),
'Moose': len(os.listdir('dataset/train/moose'))}

In [None]:
plt.bar(number_test_classes.keys(), number_test_classes.values(), width = .5);
plt.title("Number of Images by Test Class");
plt.xlabel('Class Name');
plt.ylabel('# Images');

Plotting Image Sizes

Training Dataset

In [None]:
from keras.utils import image_dataset_from_directory
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import keras
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator, img_to_array
from keras.models import Sequential
from keras.layers import Dense, Flatten, BatchNormalization, Dropout
from glob import glob
from dask import bag
from dask.diagnostics import ProgressBar
%matplotlib inline

In [None]:
train_size_images = dict()
for dirpath, _, filenames in os.walk(TRAIN_PATH):
    for path_image in filenames:
        image = os.path.abspath(os.path.join(dirpath, path_image))
        with Image.open(image) as img:
            width, height = img.size
            train_size_images[path_image] = {'width': width, 'height': height,'path':dirpath}
train_size_images = pd.DataFrame.from_dict(train_size_images,'index')
train_size_images.reset_index(inplace=True)
train_size_images[['folder','subfolder','class']] = train_size_images['path'].str.split('/',n=3,expand=True)

In [None]:
class_lst = list(train_size_images['class'].unique())
fig, ax = plt.subplots(2, 2, figsize = (10, 5))
fig.tight_layout(pad=4.0)
ax = ax.ravel()
for idx,c in enumerate(class_lst,0):
    tempdf = train_size_images.loc[train_size_images['class'] == c]
    ax[idx].plot(tempdf['width'],tempdf['height'],'o')
    ax[idx].set_xlabel('Width')
    ax[idx].set_ylabel('Height')
    ax[idx].set_title(c)
fig.suptitle("Train Dataset Image's Size", fontsize=15)

Testing Dataset

In [None]:
test_size_images = dict()
for dirpath, _, filenames in os.walk(TEST_PATH):
    for path_image in filenames:
        image = os.path.abspath(os.path.join(dirpath, path_image))
        with Image.open(image) as img:
            width, height = img.size
            test_size_images[path_image] = {'width': width, 'height': height,'path':dirpath}
test_size_df = pd.DataFrame.from_dict(test_size_images,'index')
test_size_df.reset_index(inplace=True)
test_size_df[['folder','subfolder','class']] = test_size_df['path'].str.split('/',n=3,expand=True)

In [None]:
class_lst = list(test_size_df['class'].unique())
fig, ax = plt.subplots(2, 2, figsize = (10, 5))
fig.tight_layout(pad=4.0)
ax = ax.ravel()
for idx,c in enumerate(class_lst,0):
    tempdf = test_size_df.loc[test_size_df['class'] == c]
    ax[idx].plot(tempdf['width'],tempdf['height'],'o')
    ax[idx].set_xlabel('Width')
    ax[idx].set_ylabel('Height')
    ax[idx].set_title(c)
fig.suptitle("Test Dataset Image's Size", fontsize=15)

Check Image color types

In [None]:
directories = {
    'caribou': TRAIN_PATH + '/caribou/',
    'deer': TRAIN_PATH + '/deer/',
    'elk': TRAIN_PATH + '/elk/',
    'moose': TRAIN_PATH + '/moose/'
}

In [None]:
def get_image_type(image_path):
    """
    Determine if an image is 'grayscale' or 'rgb'.
    """
    im = Image.open(image_path)
    arr = np.array(im)
    if len(arr.shape) == 2:
        return 'grayscale'
    elif len(arr.shape) == 3 and arr.shape[2] == 3:
        return 'rgb'
    else:
        return 'other'  # Might catch cases like RGBA or other unexpected formats.
 
def count_image_types_and_get_grayscale_filenames(directory_path):
    """
    Count the number of 'grayscale' and 'rgb' images in a directory and
    return filenames of 'grayscale' images.
    """
    grayscale_count = 0
    rgb_count = 0
    grayscale_filenames = []
 
    for fname in os.listdir(directory_path):
        image_path = os.path.join(directory_path, fname)
        img_type = get_image_type(image_path)
        if img_type == 'grayscale':
            grayscale_count += 1
            grayscale_filenames.append(fname)
        elif img_type == 'rgb':
            rgb_count += 1
 
    return grayscale_count, rgb_count, grayscale_filenames
 
# Loop through each directory and count image types
for animal, dir_path in directories.items():
    grayscale_count, rgb_count, grayscale_files = count_image_types_and_get_grayscale_filenames(dir_path)
    print(f"For {animal}:")
    print(f"Number of grayscale images: {grayscale_count}")
    print(f"Number of RGB images: {rgb_count}")
    print(f"Filenames of grayscale images: {grayscale_files}")
    print("-" * 50)

Data Preprocessing: Reading images for Trainning and Validation

In [None]:
image_size = (224, 224)
batch_size = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_PATH,
    validation_split=0.1,
    subset="training",
    label_mode = 'int',
    seed = 123,
    image_size=image_size,
    batch_size=batch_size,
)
print(train_ds.class_names)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_PATH,
    validation_split=0.1,
    subset="validation",
    label_mode = 'int',
    seed = 123,
    image_size=image_size,
    batch_size=batch_size,
)
print(val_ds.class_names)

In [None]:
test_ds = image_dataset_from_directory(
  'dataset/test/',
  seed=123,
  image_size=(224, 224))

print(test_ds.class_names)

Data Augmentation

In [None]:
dataaug_train = tf.keras.models.Sequential( [
                                          tf.keras.Input(shape=(224,224,3)),
                                          tf.keras.layers.RandomFlip(mode='horizontal', name='rand_flip'),
                                          tf.keras.layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode='nearest', name='rand_trans'),
                                          tf.keras.layers.RandomRotation(factor=0.1, fill_mode='nearest', name='rand_rot'),
                                          tf.keras.layers.RandomZoom(height_factor=0.1,fill_mode='nearest'),
                                          tf.keras.layers.RandomBrightness(0.2),
                                          tf.keras.layers.Rescaling(1./255)
                                      ],
                                     name='dataaug' )

In [None]:
dataaug_val = keras.Sequential(
    [
        layers.experimental.preprocessing.Rescaling(scale =1./255),
        
    ]
)

In [None]:
augmented_train = train_ds.map(lambda x,y: (dataaug_train(x,training=True),y))
augmented_val = train_ds.map(lambda x,y: (dataaug_val(x,training=True),y))

In [None]:
# show original vs resized
fig, ax = plt.subplots(2, 3, figsize=(10,5))
ax = ax.ravel()
for images, labels in augmented_train:
    for i in range(6):
        ax[i].imshow(images[i])

In [None]:
full_train_ds = train_ds.concatenate(augmented_train)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = full_train_ds.prefetch(buffer_size=AUTOTUNE)
validation_dataset = val_ds.prefetch(buffer_size=AUTOTUNE)

Data Processing

In [None]:
def preprocess(images, labels):
  return preprocess_input(images), labels

train_dataset = train_dataset.map(preprocess)
val_ds = validation_dataset.map(preprocess)
test_ds = test_ds.map(preprocess)

Inception V3 Modeling

In [None]:
IncV3 = InceptionV3(include_top = False, weights = "imagenet", input_shape = (224,224,3))

In [None]:
for layer in IncV3.layers:
    layer.trainable = True

In [None]:
for i,layer in enumerate(IncV3.layers):
    print( f"Layer {i}: name = {layer.name} , trainable = {layer.trainable}" )

In [None]:
IncV3.summary()

In [None]:
model_output = IncV3.output

x = tf.keras.layers.Flatten()(model_output)
x = tf.keras.layers.Dense(2048, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(1024, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(512, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.BatchNormalization()(x)

new_outputs = tf.keras.layers.Dense(4, activation="softmax")(x)

# Construct the main model
model = tf.keras.models.Model(inputs = IncV3.inputs, outputs = new_outputs)
model.summary()

In [None]:
# Callbacks
checkpoint_filepath = "inception_callback/bestmodel_epoch{epoch:02d}_valloss{val_loss:.2f}.weights.h5"

lr_reduce = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 2, mode = 'max')
early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.1, patience = 1, mode = 'min')
checkpoint = ModelCheckpoint(checkpoint_filepath, monitor = 'val_accuracy', mode = 'max', save_best_only = True, save_weight_only = False, verbose = 1)

Compiling the Model

In [None]:
adam = tf.keras.optimizers.Adam(learning_rate = 0.0001)

model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

In [None]:
model.summary()

Fitting the Model

In [None]:
history = model.fit(
           augmented_train.repeat(), steps_per_epoch=int(2000/batch_size), 
           epochs = 30, validation_data = val_ds.repeat(), 
           validation_steps=int(2000/batch_size), callbacks=[lr_reduce, checkpoint], verbose = 2)

Evaluation the Model

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
test_result = model.evaluate(test_ds)
print(f'{model.metrics_names}: {test_result}')

START NO.6 HERE

In [None]:
# Change the BEST_MODEL_NAME before run this ce11 !!!

model = load_model('BEST_MODEL_NAME')

In [None]:
# Fine-tune from this layer onwards
start_tune = 1
stop_tune = 300

# Freeze all the layers before the `fine_tune_at` layer
for layer in model.layers[start_tune:stop_tune]:
  layer.trainable = False

#for layer in base_model.layers:
#  if layer.__class__.__name__ in ["BatchNormalization"]:
#    layer.trainable = False

In [None]:
# Set initial weight

from tensorflow.keras import layers
from tensorflow.keras import initializers

layer = layers.Dense(
    units=64,
    kernel_initializer=initializers.RandomNormal(stddev=0.01),
    bias_initializer=initializers.Zeros()
)

In [None]:
adam = tf.keras.optimizers.Adam(learning_rate = 0.0001)

model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

In [None]:
history = model.fit(
           augmented_train.repeat(), steps_per_epoch=int(2000/batch_size), 
           epochs = 30, validation_data = val_ds.repeat(), 
           validation_steps=int(2000/batch_size), callbacks=[lr_reduce, checkpoint], verbose = 2)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()

for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
test_result = model.evaluate(test_ds)
print(f'{model.metrics_names}: {test_result}')