<a href="https://colab.research.google.com/github/allagas/Bat_Data/blob/master/BuzzFinder_Retrain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

TensorFlow 2.x selected.


In [3]:
import itertools
import os
import datetime
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub

print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

CurrTime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
print('Training starting at ' + CurrTime)

TF version: 2.1.0
Hub version: 0.7.0
GPU is available
Training starting at 20200224-160357


In [26]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [4]:
module_selection = ("inception_v3", 299)
handle_base, pixels = module_selection
MODULE_HANDLE = "https://tfhub.dev/google/imagenet/{}/feature_vector/4".format(handle_base)
IMAGE_SIZE = (pixels, pixels)
print("Using {} with input size {}".format(MODULE_HANDLE, IMAGE_SIZE))

Using https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 with input size (299, 299)


In [0]:
BATCH_SIZE = 32
N_TRAIN = int(1e4)
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE

In [17]:
! rm -r ./Bat_Data/
! git clone https://github.com/allagas/Bat_Data

Cloning into 'Bat_Data'...
remote: Enumerating objects: 6596, done.[K
remote: Counting objects: 100% (6596/6596), done.[K
remote: Compressing objects: 100% (6595/6595), done.[K
remote: Total 6596 (delta 0), reused 6596 (delta 0), pack-reused 0
Receiving objects: 100% (6596/6596), 1.20 GiB | 31.58 MiB/s, done.
Checking out files: 100% (11916/11916), done.


In [21]:
! ls Bat_Data/

Train


In [0]:
train_dir = "Bat_Data/Train/"
valid_dir = "/Bat_Data/Valid/"

In [0]:
datagen_kwargs = dict(rescale=1./255)
dataflow_kwargs = dict(target_size=IMAGE_SIZE,
                       batch_size=BATCH_SIZE,
                       interpolation="bilinear")

In [24]:
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    **datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory(
    valid_dir, shuffle=False, **dataflow_kwargs)

do_data_augmentation = True
if do_data_augmentation:
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=0,
        horizontal_flip=False,
        width_shift_range=0.2,
        #height_shift_range=0.2,
        **datagen_kwargs)
else:
    train_datagen = valid_datagen
train_generator = train_datagen.flow_from_directory(
    train_dir, shuffle=True, **dataflow_kwargs)

do_fine_tuning = True

print("Building model with", MODULE_HANDLE)

Found 11916 images belonging to 2 classes.
Building model with https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4


In [0]:
BuzzFinder = tf.keras.Sequential([
    hub.KerasLayer(MODULE_HANDLE, trainable=do_fine_tuning),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(train_generator.num_classes,
                          activation='softmax',
                          kernel_regularizer=tf.keras.regularizers.l2(0.0001))
])
BuzzFinder.build((None,)+IMAGE_SIZE+(3,))
BuzzFinder.summary()

In [0]:
# Start SGD w/ Learning Rate Decay

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*1000,
  decay_rate=1,
  staircase=False)

BuzzFinder.compile(
    optimizer=tf.keras.optimizers.SGD(lr_schedule, nesterov=True, momentum=0.9),
    loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.2),
    metrics=['accuracy'])

optimizer_type = "_SGD"

# End SGD w/ LRD

In [0]:
log_dir = "/media/philip/Bat_Data/BuzzFinder/perf_log/BuzzFinderModel" + optimizer_type + CurrTime
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [0]:
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
hist = BuzzFinder.fit_generator(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps,
    callbacks=[tensorboard_callback]).history

In [0]:
saved_model_path = "/Bat_Data/BuzzFinder/BuzzFinderModel" + optimizer_type + CurrTime
tf.saved_model.save(BuzzFinder, saved_model_path)

In [0]:
# Confusion Matrix

filenames = valid_generator.filenames
nb_samples = len(filenames)

cm_preds = BuzzFinder.predict(valid_generator, steps=nb_samples)

# Match Prediction to Class Label
preds_cls_idx = cm_preds.argmax(axis= 1)
idx_to_cls = {v: k for k, v in valid_generator.class_indices.items()}
preds_cls = np.vectorize(idx_to_cls.get)(preds_cls_idx)

# Store Predictions in DataFrame
df1 = pd.DataFrame({"filenames": filenames})
df2 = pd.DataFrame({"prediction": preds_cls})
df3 = pd.DataFrame({"Buzz Prob": cm_preds[:,0], "Other Prob": cm_preds[:,1]})
predictions = pd.concat([df1, df2, df3], axis=1)

print('Saving Predictions')

# Save Predictions to CSV
pred_save_dir = "/media/philip/Bat_Data/predictions_" + "Valid_" + CurrTime + ".csv"
predictions.to_csv(pred_save_dir)

con_mat = tf.math.confusion_matrix(labels=valid_generator.classes, predictions=preds_cls_idx)
print(con_mat)