===================<br>
**First Section**  
1. connect to storage - this section is the primary one to be changed if not using google colab
2. define all the data paths
3. download the 25k image dataset, and extract it to the local machine (not remote storage)


In [10]:
# @title
# 1
# connect to colab
#configure all paths / folders to use for rest of notebook
# download and extract dataset

gdrive_path='/content/drive'
import os,zipfile
#import shutils
from google.colab import drive
drive.mount(gdrive_path)
dataset_source_address='https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip'
dataset_filename='kagglecatsanddogs_5340.zip'
dataset_gdrive_zip_folder=os.path.join(gdrive_path,'MyDrive','dataset_zips')
dataset_gdrive_zip_file=os.path.join(dataset_gdrive_zip_folder,dataset_filename)
dataset_parentdir='/tmp/data/'
temp_complete_dataset_dir=os.path.join(dataset_parentdir,'PetImages')
dataset_dir=os.path.join(dataset_parentdir,'splitdata')
dataset_temp_zip=os.path.join('/tmp',dataset_filename)
output_dir=os.path.join(gdrive_path,'MyDrive','output')

if not os.path.isdir(dataset_parentdir):
    print(f'creating {dataset_parentdir}')
    os.mkdir(dataset_parentdir)
if not os.path.isdir(output_dir):
    print(f'creating {output_dir}')
    os.mkdir(output_dir)

if not os.path.isdir(dataset_gdrive_zip_folder):
    print(f'creating {dataset_gdrive_zip_folder}')
    os.mkdir(dataset_gdrive_zip_folder)

if not os.path.isfile(dataset_gdrive_zip_file):
  print(f'wget -P {dataset_gdrive_zip_folder} {dataset_source_address} ')
  !wget -P {dataset_gdrive_zip_folder} {dataset_source_address}

if not os.path.isdir(temp_complete_dataset_dir):
  with zipfile.ZipFile(dataset_gdrive_zip_file, 'r') as zip_ref:
    zip_ref.extractall(dataset_parentdir)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
creating /content/drive/MyDrive/output


===================<br>
**Second section**.
- This is used to clean the dataset. Process for this was taken from the Keras examples at  https://keras.io/examples/vision/image_classification_from_scratch/

In [4]:
# @title
# 2
# Clean dataset

#from here https://keras.io/examples/vision/image_classification_from_scratch/
import os
import tensorflow as tf

print("cleaning dataset (removing files that don't have the required header)")

num_skipped = 0
for folder_name in ("Cat", "Dog"):
    folder_path = os.path.join(temp_complete_dataset_dir, folder_name)
    for fname in os.listdir(folder_path):
        fpath = os.path.join(folder_path, fname)
        try:
            fobj = open(fpath, "rb")
            is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
        finally:
            fobj.close()

        if not is_jfif:
            num_skipped += 1
            # Delete corrupted image
            os.remove(fpath)

print("Deleted %d images" % num_skipped)


cleaning dataset (removing files that don't have the required header)
Deleted 1590 images


===================<br>
**Third section**
- Splits the dataset into a classic train/validation/test split.
- This cannot be done using the tensorflow commands as that causes the memory to be exhausted for google colab

In [5]:
# @title
# 3
#Split cleaned dataset - doing this at the folder level to avoid OOM errors on Colab

!pip install split-folders

import splitfolders

if not os.path.isdir(dataset_dir):
  print(f'creating {dataset_dir}')
  os.mkdir(dataset_dir)

  print('splitting dataset into train/validation/test')
  splitfolders.ratio(input=temp_complete_dataset_dir, output=dataset_dir, ratio=(.8, 0.1,0.1))

print('dataset parentdir folder:')
print(os.listdir(dataset_parentdir))

print('dataset folder:')
print(os.listdir(dataset_dir))

train_dataset_dir=os.path.join(dataset_dir,'train')
val_dataset_dir=os.path.join(dataset_dir,'val')
test_dataset_dir=os.path.join(dataset_dir,'test')

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1
creating /tmp/data/splitdata
splitting dataset into train/validation/test


Copying files: 23412 files [00:05, 4107.57 files/s]

dataset parentdir folder:
['splitdata', 'readme[1].txt', 'PetImages', 'CDLA-Permissive-2.0.pdf']
dataset folder:
['train', 'val', 'test']





===================<br>
**Fourth section**
1. check the number of classes in the dataset (at this point the folder structure should be dataset_dir\train\class1\image1.jpg)
2. based on the number of classes, determine if we are doing binary classification or categorical classification and set variables appropriately for later use.
3. configure data augmentation to use (randomly change images)
4. Build the model to use.

Parameters for this section - are
1. the Network architecture to use.
2. Whether to use pre-trained weights (weights='imagenet') or random weights (weights=None)

In [19]:
# @title
# 4
# check dataset for number of classes & set variables
# load model
import tensorflow as tf
print(f'TensorFlow version: {tf.__version__}')

architecture='xception'
weights='imagenet'
# set weights to 'imagenet' or None

n_classes=max(len(os.listdir(train_dataset_dir)),len(os.listdir(val_dataset_dir)),len(os.listdir(test_dataset_dir)))
if(n_classes>2):
    label_mode='categorical'
    loss_function=tf.keras.losses.CategoricalCrossentropy
    prediction_layer=tf.keras.layers.Dense(n_classes,activation='softmax')
else:
    label_mode='binary'
    loss_function=tf.keras.losses.BinaryCrossentropy
    prediction_layer=tf.keras.layers.Dense(1,activation='sigmoid')

#Clear any already loaded data
tf.keras.backend.clear_session()

tfKerasApps=tf.keras.applications
base_arch_func=None
base_model=None
# get model name and preprocessing functions for the provided architecture
# e.g. tf.keras.applications.vgg16
try:
    base_arch_func=getattr(tfKerasApps, architecture)
    archDisplayName=dir(base_arch_func)[0]
    base_model_func=getattr(base_arch_func, archDisplayName)
    preprocess_input=getattr(base_arch_func, 'preprocess_input')
except AttributeError:
    print(f'function not found - was architecture correctly provided?')

tf.keras.backend.clear_session()

#Get model shapes by loading a temporary version of the model
temp_model=base_model_func(weights=None)
preferred_input_shape=temp_model.get_build_config().get('input_shape')[1:]
preferred_image_size=preferred_input_shape[:2]
temp_model=None

tf.keras.backend.clear_session()

print(f'building {archDisplayName} model')

#print(f'loading {archDisplayName} model with {weights} weights')
base_model=base_model_func(
    include_top=False,
    weights=weights,
    input_shape=preferred_input_shape,
    pooling='avg')

if(weights == 'imagenet'):
  base_model.trainable = False

# randomize things a bit mre with the training images:
augments=[]
augments_name=''

augments_name=augments_name+'Flip'
augments = augments + [tf.keras.layers.RandomFlip('horizontal')]
augments_name=augments_name+'Rotate'
augments = augments + [tf.keras.layers.RandomRotation(30)]
augments_name=augments_name+'Zoom'
augments = augments + [tf.keras.layers.RandomZoom(0.2)]
augments_name='Random'+augments_name
data_augmentation = tf.keras.Sequential(augments,name=augments_name)


#print (f'n_classes: {n_classes}')
inputs = tf.keras.Input(shape=preferred_input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                    loss=loss_function(from_logits=False),
                    metrics=['accuracy'])

TensorFlow version: 2.13.0
building Xception model


===================<br>
**Fifth Section**
 - Load datasets

In [7]:
# @title
print('loading training dataset')
train_ds=tf.keras.utils.image_dataset_from_directory(train_dataset_dir,labels='inferred',label_mode=label_mode,image_size=preferred_image_size)
print('loading validation dataset')
validation_ds=tf.keras.utils.image_dataset_from_directory(val_dataset_dir,labels='inferred',label_mode=label_mode,image_size=preferred_image_size)
print('loading test dataset')
test_ds=tf.keras.utils.image_dataset_from_directory(test_dataset_dir,labels='inferred',label_mode=label_mode,image_size=preferred_image_size)



loading training dataset
Found 18728 files belonging to 2 classes.
loading validation dataset
Found 2340 files belonging to 2 classes.
loading test dataset
Found 2342 files belonging to 2 classes.


===================<br>
**Sixth Section** ***(Optional)***
- Get the accuracy of the model against the test dataset prior to any training - use for comparison purposes

In [18]:
# @title
#Evaluate accuracy without further training.

#evaluate takes ~15 min on colab when using CPU or ~ 30s when using GPU (once data is loaded)
(initialAccuracy,initial_loss)=model.evaluate(test_ds)



===================<br>
**Seventh Section**
1. Build callbacks
  - Early Stopping - if validation accuracy isn't improving
  - reduceLrOnPlateau - if training accuracy isn't improving (do this sooner than the early stopping one
  - Checkpoint - save a copy of the model when the best results are obtained (useful if google colab session times out). The methods to load this are beyond the scope of this notebook.

2. setup dataset autotuning. In theory this speeds up training by reducing idle time on GPU - by ensuring fresh data is available for processing all the time.

3. start the training. This takes a while - especially if training from random weights.
Note the number of trainable parameters in use - this changes depending on whether random weights are used or imagenet weights are used.

In [None]:
# @title
# Setup callbacks
#then try & train the model.

import time

base_learning_rate=0.02
#Early Stopping Patience (epoch Count)
es_patience=5
#Learning Rate Patience (epoch Count)
lr_patience=3
initial_epoch=0
epochs=30

train_id = str(int(time.time()))

callbacks=[]
callbacks = callbacks + [tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=es_patience)]
callbacks=callbacks +[tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', factor=0.2, min_delta=0.005,
                      patience=lr_patience, min_lr=0.000001)]
checkpoint_dir=os.path.join(output_dir,'checkpoints')
if not os.path.isdir(checkpoint_dir):
    os.mkdir(checkpoint_dir)

callbacks=callbacks +[tf.keras.callbacks.ModelCheckpoint(
    checkpoint_dir,
    monitor='val_accuracy',
    verbose=0,
    save_best_only=True,
    mode='auto',
    save_freq='epoch')]


model.summary(show_trainable=True)
# set up autotuning to try & speed things up:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)
print("training")

print(f"es_patience={es_patience}")
print(f"lr_patience={lr_patience} (current: {base_learning_rate})")

history = model.fit(train_ds,
    initial_epoch=initial_epoch,
    epochs=(initial_epoch+epochs),
    validation_data=validation_ds,
    callbacks=callbacks,
    validation_freq=1)
initial_epoch=history.epoch[-1]+1

if(history.epoch[-1] <= (epochs-1)):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

loss, accuracy = model.evaluate(test_ds)
print(f"{architecture} - Test accuracy :", accuracy)


Model: "model"
____________________________________________________________________________
 Layer (type)                Output Shape              Param #   Trainable  
 input_2 (InputLayer)        [(None, 299, 299, 3)]     0         Y          
                                                                            
 RandomFlipRotateZoom (Sequ  (None, 299, 299, 3)       0         Y          
 ential)                                                                    
                                                                            
 tf.math.truediv (TFOpLambd  (None, 299, 299, 3)       0         Y          
 a)                                                                         
                                                                            
 tf.math.subtract (TFOpLamb  (None, 299, 299, 3)       0         Y          
 da)                                                                        
                                                             

===================<br>
**Eighth section**
- Save the model

In [None]:
# @title
model_path=os.path.join(output_dir
        ,'saved_models')
if not os.path.isdir(model_path):
    os.mkdir(model_path)
save_path = os.path.join(model_path,
        f'{architecture}_{train_id}')
print(f'saving {architecture} model to file "{save_path}"')
model.save(save_path)



===================<br>
**Ninth Section**
- Plot training results.

In [None]:
# @title

plot_model_filepath=os.path.join(output_dir,f'{architecture}_model.png')
tf.keras.utils.plot_model(model,to_file=plot_model_filepath)

fig, (ax1, ax2) = plt.subplots(2, 1)
ax1.plot(acc, label='Training Accuracy')
ax1.plot(val_acc, label='Validation Accuracy')
ax1.set_ylim([0.5, 1])
#if weights == 'imagenet':
#    ax1.plot([epochs-1,epochs-1],
#        ax1.get_ylim(), label='Start Fine Tuning')
ax1.legend(loc='lower right')
ax1.set_title(f'{architecture} Training and Validation Accuracy ')

ax2.plot(loss, label='Training Loss')
ax2.plot(val_loss, label='Validation Loss')
ax2.set_ylim([0, 1.0])
#if weights == 'imagenet':
#    ax2.plot([epochs-1,epochs-1],
#        ax2.get_ylim(), label='Start Fine Tuning')
ax2.legend(loc='upper right')
ax2.set_title('Training and Validation Loss')
ax2.set_xlabel('epoch')
fig.tight_layout()
fig_filename=os.path.join(f'{output_dir}',
        f'{architecture}-{epochs}-{epochs}-{weights}-{train_id}')
fig.savefig(fname=fig_filename)
