In [7]:
import os, warnings
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec #this allows advance layout grids for creating coplex visulaizations
import pandas as pd
%matplotlib inline
import tensorflow as tf #a deep learning library to build and train models
from tensorflow.keras.preprocessing import image_dataset_from_directory #a tf utility used to load imgae datasets directly from a directory structure

Load the Data

In [5]:
#reproducibility: this function sets the seed for each operation so that they produce the same results regardless of how many times the lines are run
def set_seed(seed=123):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTC_OPS'] = '1'

set_seed(123)

In [8]:
#set natplotlib default parameters
plt.rc('figure', autolayout=True) #customizes the style of the plot
plt.rc('axes', labelweight='bold', labelsize='large', 
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") #suppresses warnings in the output for cleaner results

In [26]:
#loading the training and the validation sets
ds_train_ = image_dataset_from_directory(
    r"Train",
    labels="inferred", #infer labels from the subfolder names
    label_mode="binary", #create binary labels (0, 1)
    image_size=[224, 224], #resizes all the inages to 128 by 128 pixels
    interpolation='nearest', #use the nearest pixel to determine the values of the new pixel after resizing the images
    batch_size=64, #divides the dataset into abtches of 64 images
    shuffle=True #randomize the order of the dataset
)

ds_test_ = image_dataset_from_directory(
    r"Test",
    labels="inferred",
    label_mode="binary",
    image_size=[224, 224],
    interpolation='nearest',
    batch_size=64,
    shuffle=True
)

Found 23650 files belonging to 2 classes.
Found 3863 files belonging to 2 classes.


In [27]:
#data pipeline
def convert_to_float(image, label):
    """This function ensures that images are converted to float data type 
    (scaled between 0 and 1) for compatitbility with deep learning models"""
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

In [28]:
#prepare and optimze the dataset
AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = (
    ds_train_.map(convert_to_float).cache().prefetch(buffer_size=AUTOTUNE) #chaches the dataset for faster loading
     #preloads the dataset to improve training speed
)

ds_test = (
    ds_test_.map(convert_to_float).cache().prefetch(buffer_size=AUTOTUNE) #chaches the dataset for faster loading
    #preloads the dataset to improve training speed
)

Define Pretrained Base <br>
<p>The most commonly used dataset for pretraining is ImageNet, a large dataset of many kind of natural images. Keras includes a variety models pretrained on ImageNet in its applications module. The pretrained model we'll use is called VGG16</p>

In [21]:
from keras.applications.vgg16 import VGG16

pretrained_base = VGG16()

#freeze the pretrain base
pretrained_base.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 0us/step


Attach the Head

In [29]:
from tensorflow import keras
from tensorflow.keras import layers

#building the model
model = keras.Sequential([
    pretrained_base,
    layers.Flatten(), #converts the multidimesional output of the pretrained base into a 1D vector
    layers.Dense(6, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

Train the model

In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics = ['binary_accuracy']
)

history = model.fit(
    ds_train,
    validation_data=ds_test,
    epochs=30,
    verbose=0,
)

When training a neural network, it's always a good idea to examine the loss and metric plots. The `history` object contains this information in a dictionary `history.history`. We can use Pandas to convert this dictionary to a dataframe and plot it with a built-in method.

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot()