## **Imports**

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

from google.colab import drive
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
import random
import sys
import os


In [2]:
# mount drive folder and import custom modules
drive.mount('/content/drive', force_remount=True)
sys.path.insert(0,'/content/drive/MyDrive/Shark_Classification')

from architectures.smallvggnet import SmallVGGNet
from keras_callbacks.keras_callbacks import LearningRateScreening

Mounted at /content/drive


## **Function to List Files in Directory**

In [3]:
file_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
keep_labels     = ['great_white_shark','hammerhead_shark']

def list_files(indir=os.getcwd(),valid_extensions=file_extensions,valid_labels=keep_labels):
    for (rootdir,dirs,files) in os.walk(indir):
        for filename in files:
            # determine the file extension of the current file
            ext = filename[filename.rfind("."):].lower()
            
            # check to see if the file is an image and should be processed
            if valid_extensions is None or ext.endswith(valid_extensions):
                
                # construct the path to the image and yield it
                imagePath = os.path.join(rootdir, filename)
                
                # yield the path if the label should not be dropped 
                if imagePath.split(os.path.sep)[-2] in valid_labels:
                    yield imagePath
            
    return

## **Read Files**

In [4]:
dataset_path = os.path.join(sys.path[0],'sharks')
output_path= os.path.join(sys.path[0],"output")
    
#obtain image paths and ramdomize it
image_paths = list(list_files(dataset_path))
random.seed(42)
random.shuffle(image_paths)
    
# initialize data and labels list
data, labels, count, max_count = [],[],0,-1

print("[INFO] Reading images from disk. This may take a while ... ")    
for i in image_paths:

    # load the image  and store the image in the data list
    image = cv2.imread(i)
    image = cv2.resize(image, (64, 64))
    data.append(image)
       
    label = i.split(os.path.sep)[-2]
    labels.append(label)
        
    count+=1
    if count==max_count: break

# print label count
label_list = os.listdir(dataset_path)
for l in label_list: print("label: {} counts: {}".format(l,labels.count(l)))

[INFO] Reading images from disk. This may take a while ... 
label: great_white_shark counts: 928
label: mako counts: 0
label: tiger_shark counts: 0
label: hammerhead_shark counts: 744
label: whale_shark counts: 0


## **Data Preprocessing**

### **Scale Images**

In [5]:
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data,dtype="float") / 255.0
labels = np.array(labels)

### **Train/Test Split**

In [6]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.25, random_state=42)

###**One Hot-Encoding**

In [7]:
original_classes = trainY
    
# integer encode
label_encoder = LabelEncoder()
trainY = label_encoder.fit_transform(trainY)
testY  = label_encoder.transform(testY)
    
#print(original_classes[0:10])
#print(trainY[0:10])
u, indices =np.unique(trainY,return_index=True)
classes = [original_classes[i] for i in indices]
#print(classes)
  
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
trainY = trainY.reshape(len(trainY), 1)
trainY = onehot_encoder.fit_transform(trainY)
testY = testY.reshape(len(testY), 1)
testY = onehot_encoder.transform(testY)
#print(trainY[0:10])

###**Data Augmentation**

In [8]:
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
                          height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                          horizontal_flip=True, fill_mode="nearest")

##**Define Model and Compile**

In [11]:
# initialize our VGG-like Convolutional Neural Network
model = SmallVGGNet.build(width=64, height=64, depth=3,classes=len(classes))

# initialize our initial learning rate and # of epochs to train for
INIT_LR = 1e-9
EPOCHS = 100
BS = 128
STEP_EPOCH = len(trainX) // BS
N_BATCH_UPDATES = STEP_EPOCH*EPOCHS

# init learning rate screening callback
lr_screening = LearningRateScreening(max_lr = 10, n_batch_updates = N_BATCH_UPDATES)
    
# compile the model using SGD as our optimizer and categorical
# cross-entropy loss (you'll want to use binary_crossentropy
# for 2-class classification)
print("[INFO] training network...")
opt = SGD(learning_rate=INIT_LR)#,decay=INIT_LR/EPOCHS - 0.005
#opt = Adam(learning_rate=INIT_LR)
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])

[INFO] training network...


##**Train Model**

In [12]:
# train the network
#math.floor(math.log(lr,10))
H = model.fit(x=aug.flow(trainX, trainY, batch_size=BS),
              validation_data=(testX, testY), steps_per_epoch=STEP_EPOCH,
              epochs=EPOCHS,callbacks=[lr_screening])

Epoch 1/100
1/9 [==>...........................] - ETA: 12s - loss: 0.9982 - accuracy: 0.5078 - learning rate: 9.999999717180685e-10
2/9 [=====>........................] - ETA: 1s - loss: 1.0171 - accuracy: 0.5391  - learning rate: 1.025914353469659e-09
Epoch 2/100
1/9 [==>...........................] - ETA: 2s - loss: 1.0842 - accuracy: 0.5156 - learning rate: 1.2589254083650303e-09
2/9 [=====>........................] - ETA: 1s - loss: 0.9934 - accuracy: 0.5352 - learning rate: 1.291549645010548e-09
Epoch 3/100
1/9 [==>...........................] - ETA: 1s - loss: 1.1189 - accuracy: 0.4412 - learning rate: 1.5848932166662166e-09
2/9 [=====>........................] - ETA: 1s - loss: 1.1238 - accuracy: 0.4522 - learning rate: 1.6259646962168972e-09
Epoch 4/100
1/9 [==>...........................] - ETA: 2s - loss: 1.1134 - accuracy: 0.5312 - learning rate: 1.9952621777008517e-09
2/9 [=====>........................] - ETA: 1s - loss: 1.1109 - accuracy: 0.5195 - learning rate: 2.046968

##**Evaluate the Network**

##**Save the Model**

In [25]:
import plotly.graph_objects as go
import math 
from scipy import signal
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=lr_screening.lr_list, y=signal.savgol_filter(lr_screening.loss_list,53,3),mode='lines',name='Train Loss'))
fig.update_xaxes(type="log")
fig.update_layout( yaxis_title="Loss",xaxis_title="Learning Rate - Log Scale")

fig.show()

In [None]:
# save the model and label binarizer to disk
save = False
if save:
  print("[INFO] serializing network and label binarizer...")
  model.save(os.path.join(output_path,'smallvggnet.model'), save_format="h5")
  f = open(os.path.join(output_path,'smallvggnet.pickle'), "wb")
  f.write(pickle.dumps(classes))
  f.close()