## **Imports**

In [11]:
# import the necessary packages
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img

from google.colab import drive
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pickle
import cv2
import random
import sys
import os


In [12]:
# mount drive folder and import custom modules
drive.mount('/content/drive', force_remount=False)
sys.path.insert(0,'/content/drive/MyDrive/Shark_Classification')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## **Loag VGG Net**

In [13]:
# load the VGG16 network and initialize the label encoder
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)

[INFO] loading network...


## **Function to List Files in Directory**

In [14]:
file_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
keep_labels     = ['great_white_shark','hammerhead_shark']

def list_files(indir=os.getcwd(),valid_extensions=file_extensions,valid_labels=keep_labels):
    for (rootdir,dirs,files) in os.walk(indir):
        for filename in files:
            # determine the file extension of the current file
            ext = filename[filename.rfind("."):].lower()
            
            # check to see if the file is an image and should be processed
            if valid_extensions is None or ext.endswith(valid_extensions):
                
                # construct the path to the image and yield it
                imagePath = os.path.join(rootdir, filename)
                
                # yield the path if the label should not be dropped 
                if imagePath.split(os.path.sep)[-2] in valid_labels:
                    yield imagePath
            
    return

## **Read Files**

In [15]:
dataset_path = os.path.join(sys.path[0],'sharks')
output_path= os.path.join(sys.path[0],"output")
    
#obtain image paths and ramdomize it
image_paths = list(list_files(dataset_path))
random.seed(42)
random.shuffle(image_paths)
    
# initialize data and labels list
data, labels, count = [],[],0

# preparing labels  
for i in image_paths:       
    label = i.split(os.path.sep)[-2]
    labels.append(label)

# print label count
label_list = os.listdir(dataset_path)
for l in label_list: print("label: {} counts: {}".format(l,labels.count(l)))

label: great_white_shark counts: 928
label: mako counts: 0
label: tiger_shark counts: 0
label: hammerhead_shark counts: 744
label: whale_shark counts: 0


## **Data Preprocessing**

### **Train/Test Split**

In [16]:
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
test_size = 0.25
isplit = int(len(labels)*(1-test_size))

Labels = [labels[:isplit], labels[isplit:]]
Paths  = [image_paths[:isplit],image_paths[isplit:]]
Split  = ["train","val"]


### **Read and Preprocess Image Function**

In [17]:
def read_and_preprocess(path):
  image = load_img(path, target_size=(224, 224))
  image = img_to_array(image)
  image = np.expand_dims(image, axis=0)
  image = preprocess_input(image)
  return image


##**Perform Feature Extraction**

In [18]:
# initialize label encoder and batch size
BS =32
le = None
Features = {}

# loop over the possible dataset splits
for (s,split) in enumerate(Split):
  labels    = Labels[s]
  img_paths = Paths[s]
  Features[split+"X"] = []
  Features[split+"Y"] = []

  # if the label encoder is None, create it
  if le is None:
    le = LabelEncoder()
    le.fit(labels)
	
  # open the output CSV file for writing
  csvPath = os.path.sep.join([output_path,"{}.csv".format(split)])
  csv = open(csvPath, "w")
 
  # loop over the images in batches
  for (b, i) in enumerate(range(0, len(img_paths), BS)):
    
    # extract the batch of images and labels, then initialize the
    # list of actual images that will be passed through the network
    # for feature extraction
    print("[INFO] processing batch {}/{}".format(b + 1, int(np.ceil(len(img_paths) / float(BS)))))
    batchPaths = img_paths[i:i + BS]
    batchLabels = le.transform(labels[i:i + BS])
    
    # loop over the images and labels in the current batch
    # add the preprocessed image to the batch
    batchImages = []
    for imagePath in batchPaths: batchImages.append(read_and_preprocess(imagePath))

    # pass the images through the network and use the outputs as
    # our actual features, then reshape the features into a
    # flattened volume
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=BS)
    features = features.reshape((features.shape[0], 7 * 7 * 512))

    # loop over the class labels and extracted features
    for (label, vec) in zip(batchLabels, features):

      # append feature vector and labels
      Features[split+"X"].append(vec)
      Features[split+"Y"].append(label)

      # construct a row that exists of the class label and
      # extracted features
      vec = ",".join([str(v) for v in vec])
      csv.write("{},{}\n".format(label, vec))

  # close the CSV file
  csv.close()

[INFO] processing batch 1/40
[INFO] processing batch 2/40
[INFO] processing batch 3/40
[INFO] processing batch 4/40
[INFO] processing batch 5/40
[INFO] processing batch 6/40
[INFO] processing batch 7/40
[INFO] processing batch 8/40
[INFO] processing batch 9/40
[INFO] processing batch 10/40
[INFO] processing batch 11/40
[INFO] processing batch 12/40
[INFO] processing batch 13/40
[INFO] processing batch 14/40
[INFO] processing batch 15/40
[INFO] processing batch 16/40
[INFO] processing batch 17/40
[INFO] processing batch 18/40
[INFO] processing batch 19/40
[INFO] processing batch 20/40
[INFO] processing batch 21/40
[INFO] processing batch 22/40
[INFO] processing batch 23/40
[INFO] processing batch 24/40
[INFO] processing batch 25/40
[INFO] processing batch 26/40
[INFO] processing batch 27/40
[INFO] processing batch 28/40
[INFO] processing batch 29/40
[INFO] processing batch 30/40
[INFO] processing batch 31/40
[INFO] processing batch 32/40
[INFO] processing batch 33/40
[INFO] processing b

##**Save the Label Encoder**

In [19]:
le_path=os.path.join(output_path, "le.cpickle")

# serialize the label encoder to disk
f = open(le_path, "wb")
f.write(pickle.dumps(le))
f.close()

##**Train a Logistig Regression on features**

In [20]:
trainX = np.array(Features["trainX"])
trainY = np.array(Features["trainY"])
testX  = np.array(Features["valX"])
testY  = np.array(Features["valY"])

# train the model
print("[INFO] training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto",max_iter=150)
model.fit(trainX, trainY)

# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds, target_names=le.classes_))

[INFO] training model...
[INFO] evaluating...
                   precision    recall  f1-score   support

great_white_shark       1.00      0.97      0.98       225
 hammerhead_shark       0.97      0.99      0.98       193

         accuracy                           0.98       418
        macro avg       0.98      0.98      0.98       418
     weighted avg       0.98      0.98      0.98       418



##**Save the model**


In [21]:
save=True
modelPath = os.path.sep.join([output_path,"vggnet_transfer_model.cpickle"])

if save:
  # serialize the model to disk
  print("[INFO] saving model...")
  f = open(modelPath, "wb")
  f.write(pickle.dumps(model))
  f.close()

[INFO] saving model...
