In [1]:
# This will install dependencies and display them in the notebook
#!pip install -r requirements.txt

# Install dependencies in Quiet Mode
#!pip install -r -q requirements.txt

In [2]:
import os
import pathlib
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import nvitop
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import RMSprop

# os.environ["TF_GPU_THREAD_MODE"] = "gpu_private" can slow down computation depending on setup and GPU type (developed on GTX 1660)
tf.distribute.OneDeviceStrategy(device="/gpu:0")

<tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy at 0x206a6eac910>

In [3]:
policy = tf.keras.mixed_precision.Policy("mixed_float16")
tf.keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce GTX 1660, compute capability 7.5


In [4]:
cwd = pathlib.Path.cwd()

datadir = pathlib.Path(
    cwd / 'cats-v-dogs' /'training'
)
valdir = pathlib.Path(
    cwd / 'cats-v-dogs' / 'testing'
)

AUTOTUNE = tf.data.AUTOTUNE

logdir = pathlib.Path.cwd() / "logs"

if logdir.exists():
    !rmdir /q/s logs

In [5]:
image_count = len(list(datadir.glob("*/*.jpg")))
print(image_count)
batch_size = 128
img_height = 150
img_width = 150

22283


In [6]:
list_ds = tf.data.Dataset.list_files(str(datadir / "*/*.jpg"), shuffle=False)
val_ds = tf.data.Dataset.list_files(str(valdir / "*/*.jpg"), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)
num_train_files = len(list_ds)
num_val_files = len(val_ds)

In [7]:
for f in list_ds.take(5):
    print(f.numpy())
print("validation")
for f in val_ds.take(5):
    print(f.numpy())

b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\training\\dogs\\3294.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\training\\cats\\534.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\training\\cats\\2621.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\training\\dogs\\3661.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\training\\dogs\\12031.jpg'
validation
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\testing\\cats\\100.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\testing\\cats\\10004.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\testing\\cats\\10024.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\testing\\cats\\10038.jpg'
b'C:\\Users\\josephdavis\\Desktop\\Tensorflow notebooks\\cats-v-dogs\\testing\\cats\\10052.jpg'


In [8]:
class_names = np.array(sorted([item.name for item in datadir.glob("*")]))
print(class_names)

['cats' 'dogs']


In [9]:
print(tf.data.experimental.cardinality(list_ds).numpy())
print(tf.data.experimental.cardinality(val_ds).numpy())

22283
2473


In [10]:
#Create Process path functions for creating TF Data Pipeline

def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    one_hot = parts[-2] == class_names
    return tf.argmax(one_hot)

def decode_img(img):
    img = tf.image.decode_jpeg(img, channels=3)
    return img

def process_path(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

In [11]:
#TF.Data API using map, where the interleave seems to cause input data pipeline slowdown (I think it's my development CPU bottlenecking the multi-thread interleave process)

train_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)

# train_ds = list_ds.interleave(
#     lambda x: tf.data.Dataset.list_files(str(datadir / "*/*.jpg"), shuffle=True),
#     num_parallel_calls=AUTOTUNE,
#     cycle_length=4,
# #     block_length=4,
# #     deterministic=False,
# ).map(process_path, num_parallel_calls=AUTOTUNE)
# val_ds = val_ds.interleave(
#     lambda x: tf.data.Dataset.list_files(str(valdir / "*/*.jpg"), shuffle=True),
#     num_parallel_calls=AUTOTUNE,
#     cycle_length=4,
# #     block_length=4,
# #     deterministic=False,
# ).map(process_path, num_parallel_calls=AUTOTUNE)

# train_ds.cache()
# val_ds.cache()

In [12]:
#augmentation and resize/rescale preprocess layers
data_augmentation = tf.keras.Sequential(
    [
        #         layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomFlip("horizontal"),
        layers.experimental.preprocessing.RandomRotation(0.2),
        layers.experimental.preprocessing.RandomWidth(0.01, interpolation="bilinear"),
        layers.experimental.preprocessing.RandomHeight(0.01, interpolation="bilinear"),
#         layers.experimental.preprocessing.RandomContrast(0.2), re-enable these if over-fitting on data
#         layers.experimental.preprocessing.RandomZoom(0.1),
    ]
)

resize_and_rescale = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.Resizing(img_height, img_width),
        layers.experimental.preprocessing.Rescaling(1.0 / 255),
    ]
)

In [13]:
def prepare(ds, shuffle=False, augment=False):

    # Resize and rescale all datasets.
    ds = ds.map(lambda x, y: (resize_and_rescale(x), y), num_parallel_calls=AUTOTUNE)

    # cache all datasets after resize/rescale
    ds.cache()

    # shuffle only Training DS
    if shuffle:
        ds = ds.shuffle(1000)

    # Batch all datasets
    ds = ds.batch(batch_size)

    # augment only training dataset, call cache after augmentation dramatically increases input speed
    if augment:
        ds = ds.map(
            lambda x, y: (data_augmentation(x, training=True), y),
            num_parallel_calls=AUTOTUNE,
        ).cache()

    # Use buffered prefecting on all datasets
    return ds.prefetch(buffer_size=AUTOTUNE)


train_ds = prepare(train_ds, shuffle=True, augment=True)
val_ds = prepare(val_ds)

#experimental options for increasing input data pipeline speed further 
# options = tf.data.Options()
# options.experimental_threading.max_intra_op_parallelism = 1
# train_ds = train_ds.with_options(options)
# val_ds = val_ds.with_options(options)

In [14]:
model = tf.keras.models.Sequential(
    [
        # Note the input shape is the desired size of the image 150x150 with 3 bytes color
        tf.keras.layers.Conv2D(
            16, (3, 3), activation="relu", input_shape=(150, 150, 3)
        ),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Conv2D(32, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Conv2D(128, (3, 3), activation="relu"),
        tf.keras.layers.MaxPooling2D(2, 2),
        # Flatten the results to feed into a DNN
        tf.keras.layers.Flatten(),
#         tf.keras.layers.Dropout(.2), if need due to overfit
        # 512 neuron hidden layer
        tf.keras.layers.Dense(512, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('cats') and 1 for the other ('dogs')
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

# resnet_model = tf.keras.applications.ResNet50(
# include_top=False, weights='imagenet',input_shape=(224,224,3), pooling ='avg')
# for layer in resnet_model.layers:
#     layer.trainable = False
# resnet_model.add(tf.keras.layers.Flatten())
# output_layer = tf.keras.layers.Dense(1, activation="sigmoid")
# # resnet_model.add(output_layer_)
# model = tf.keras.models.Model(inputs=resnet_model.input, outputs = output_layer)

model.compile(
    optimizer=RMSprop(lr=0.001), loss="binary_crossentropy", metrics=["accuracy"]
)

# model.summary()

In [15]:
#Create LogDir for Tensorboard writing
logs = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")

tboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=logs, histogram_freq=1, profile_batch="500,520"
)

#early stopping callback for when val_loss drops more than 3 epochs in a row, restores best weights from model training
callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", mode="auto", patience=3, restore_best_weights=True
)

In [16]:
history = model.fit(
    train_ds,
    epochs=50,
    verbose=1,
    validation_data=val_ds,
    callbacks=[callback, tboard_callback],
    # enable steps per epoch and val steps if using tf.data.interleave(map_func, num_parallel_calls).map(process_path)
    #     steps_per_epoch=int(num_train_files / batch_size),
    #     validation_steps=int(num_val_files / batch_size),
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


In [17]:
#notebook magic for t-board extension loading and calling in-line tboard from logdir

%load_ext tensorboard
%tensorboard --logdir=logs

Reusing TensorBoard on port 6006 (pid 58252), started 7 days, 5:46:20 ago. (Use '!kill 58252' to kill it.)

In [19]:
#uncomment for evaluation on validation dataset
model.evaluate(val_ds)



[0.5104008316993713, 0.7634451985359192]

In [20]:
import numpy as np

# from google.colab import files
from keras.preprocessing import image

# uploaded=files.upload()

# for fn in uploaded.keys():

# predicting images
#   path='/content/' + fn
path = "C:/Users/josephdavis/Desktop/Tensorflow notebooks/cat-dog-val/"
for fn in os.listdir(path):
    img = image.load_img(path + fn, target_size=(150, 150))

    x = image.img_to_array(img)
    x = x/255.
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])

    classes = model.predict(images, batch_size=10)
    

    if classes[0] > 0:
        print(classes[0])
        print(fn + " is a dog")

    else:
        print(fn + " is a cat")

[0.4387]
127.jpg is a dog
[0.6865]
137.jpg is a dog
[0.7637]
140.jpg is a dog
[0.446]
159.jpg is a dog
[0.998]
196.jpg is a dog
[0.1104]
205.jpg is a dog
[0.6865]
218.jpg is a dog
[0.982]
220.jpg is a dog
[0.7666]
222.jpg is a dog
[0.9688]
229.jpg is a dog
[0.524]
232.jpg is a dog
[0.9863]
243.jpg is a dog
[0.146]
274.jpg is a dog
[0.8784]
279.jpg is a dog
[0.648]
285.jpg is a dog
[0.999]
321.jpg is a dog
[0.417]
326.jpg is a dog
[0.2354]
330.jpg is a dog
[0.982]
338.jpg is a dog
[0.0642]
345.jpg is a dog
[0.87]
346.jpg is a dog
[0.905]
353.jpg is a dog
[0.2126]
363.jpg is a dog
[0.634]
365.jpg is a dog
[0.907]
402.jpg is a dog
[0.909]
407.jpg is a dog
[0.768]
409.jpg is a dog
[0.9395]
425.jpg is a dog
[0.6294]
427.jpg is a dog
[0.998]
437.jpg is a dog
[0.7896]
455.jpg is a dog
[0.8354]
463.jpg is a dog
[0.3833]
471.jpg is a dog
[0.4922]
483.jpg is a dog
[0.984]
487.jpg is a dog
[0.6143]
517.jpg is a dog
[0.804]
519.jpg is a dog
[0.2]
533.jpg is a dog
[0.9854]
546.jpg is a dog
[0.8853]