# Transfer learning with a pretrained MobileNetV2

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

import numpy as np

import PIL

import scipy

import pickle

import pandas as pd

from skimage import transform

from PIL import Image

import matplotlib.pyplot as plt

import openpyxl

import time

In [2]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, LSTM
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

keras = tf.keras

In [3]:
NET_TO_PREDICT = "MBV2"
CSV_NAME = "TrainedMB"

In [4]:
def path_join(dirname, filenames):
    return [os.path.join(dirname, filename) for filename in filenames]

In [5]:
# Import a function from sklearn to calculate the confusion-matrix.
from sklearn.metrics import confusion_matrix


def print_confusion_matrix(cls_pred):
    # cls_pred is an array of the predicted class-number for
    # all images in the test-set.

    # Get the confusion matrix using sklearn.
    cm = confusion_matrix(y_true=cls_test,  # True class for test-set.
                          y_pred=cls_pred)  # Predicted class.

    print("Confusion matrix:")

    # Print the confusion matrix as text.
    print(cm)

    # Print the class-names for easy reference.
    for i, class_name in enumerate(class_names):
        print("({0}) {1}".format(i, class_name))

In [6]:
def plot_example_errors(cls_pred):
    # cls_pred is an array of the predicted class-number for
    # all images in the test-set.

    # Boolean array whether the predicted class is incorrect.
    incorrect = (cls_pred != cls_test)

    # Get the file-paths for images that were incorrectly classified.
    image_paths = np.array(image_paths_test)[incorrect]

    # Load the first 9 images.
    images = load_images(image_paths=image_paths[0:9])

    # Get the predicted classes for those images.
    cls_pred = cls_pred[incorrect]

    # Get the true classes for those images.
    cls_true = cls_test[incorrect]

    # Plot the 9 images we have loaded and their corresponding classes.
    # We have only loaded 9 images so there is no need to slice those again.
    plot_images(images=images,
                cls_true=cls_true[0:9],
                cls_pred=cls_pred[0:9])

In [7]:
def example_errors():
    # The Keras data-generator for the test-set must be reset
    # before processing. This is because the generator will loop
    # infinitely and keep an internal index into the dataset.
    # So it might start in the middle of the test-set if we do
    # not reset it first. This makes it impossible to match the
    # predicted classes with the input images.
    # If we reset the generator, then it always starts at the
    # beginning so we know exactly which input-images were used.
    generator_test.reset()

    # Predict the classes for all images in the test-set.
    y_pred = new_model.predict_generator(generator_test,
                                         steps=steps_test)

    # Convert the predicted classes from arrays to integers.
    cls_pred = np.argmax(y_pred, axis=1)

    # Plot examples of mis-classified images.
    plot_example_errors(cls_pred)

    # Print the confusion matrix.
    print_confusion_matrix(cls_pred)

In [8]:
def load_images(image_paths):
    # Load the images from disk.
    images = [plt.imread(path) for path in image_paths]

    # Convert to a numpy array and return it.
    return np.asarray(images)

In [9]:
def plot_training_history(history):
    # Get the classification accuracy and loss-value
    # for the training-set.
    acc = history.history['categorical_accuracy']
    loss = history.history['loss']

    # Get it for the validation-set (we only use the test-set).
    val_acc = history.history['val_categorical_accuracy']
    val_loss = history.history['val_loss']

    # Plot the accuracy and loss-values for the training-set.
    plt.plot(acc, linestyle='-', color='b', label='Training Acc.')
    plt.plot(loss, 'o', color='b', label='Training Loss')

    # Plot it for the test-set.
    plt.plot(val_acc, linestyle='--', color='r', label='Test Acc.')
    plt.plot(val_loss, 'o', color='r', label='Test Loss')

    # Plot title and legend.
    plt.title('Training and Test Accuracy')
    plt.legend()

    # Ensure the plot shows correctly.
    plt.show()

def plot_images(images, cls_true, cls_pred=None, smooth=True):
    assert len(images) == len(cls_true)

    # Create figure with sub-plots.
    fig, axes = plt.subplots(3, 3)

    # Adjust vertical spacing.
    if cls_pred is None:
        hspace = 0.3
    else:
        hspace = 0.6
    fig.subplots_adjust(hspace=hspace, wspace=0.3)

    # Interpolation type.
    if smooth:
        interpolation = 'spline16'
    else:
        interpolation = 'nearest'

    for i, ax in enumerate(axes.flat):
        # There may be less than 9 images, ensure it doesn't crash.
        if i < len(images):
            # Plot image.
            ax.imshow(images[i],
                      interpolation=interpolation)

            # Name of the true class.
            cls_true_name = class_names[cls_true[i]]

            # Show true and predicted classes.
            if cls_pred is None:
                xlabel = "True: {0}".format(cls_true_name)
            else:
                # Name of the predicted class.
                cls_pred_name = class_names[cls_pred[i]]

                xlabel = "True: {0}\nPred: {1}".format(cls_true_name, cls_pred_name)

            # Show the classes as the label on the x-axis.
            ax.set_xlabel(xlabel)

        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])

    # Ensure the plot is shown correctly with multiple plots
    # in a single Notebook cell.
    plt.show()

In [10]:
IMG_SIZE = 224 # All images will be resized to 160x160
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
IS_TRAINED_MODEL = True

if(IS_TRAINED_MODEL):
    loaded_model = tf.keras.models.load_model("models/MobileNetV2-50.50-pervideo-ud-1585845356")
    base_model = loaded_model.layers[0]
else:
    base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                                   include_top=False,
                                                   weights='imagenet')
    
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
model = tf.keras.Sequential([base_model,global_average_layer])

In [11]:
def predict(image_path):
    # Load and resize the image using PIL.
    img = PIL.Image.open(image_path)
    img_resized = img.resize(input_shape, PIL.Image.LANCZOS)

    # Plot the image.
    plt.imshow(img_resized)
    plt.show()

    # Convert the PIL image to a numpy-array with the proper shape.
    img_array = np.expand_dims(np.array(img_resized), axis=0)

    # the classes of the ImageNet-dataset.
    pred = base_model.predict(img_array)

In [12]:
optimizer = Adam(lr=1e-5)
loss = 'categorical_crossentropy'
metrics = ['categorical_accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [13]:
def load(filename):
   np_image = Image.open(filename)
   np_image = np.array(np_image).astype('float32')/255
   np_image = transform.resize(np_image, IMG_SHAPE)
   np_image = np.expand_dims(np_image, axis=0)
   return np_image

In [14]:
def generateDFprediction(filename, pred):
    df = pd.DataFrame(data=pred)
    df['file'] = filename
    # reorder file to be the first
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    return df

In [15]:
def loadOutputVectoresFile(checkfiledir, vectordir, vectorfile):
    cols_headings = ['xmin','xmax', 'ymin', 'ymax', 'zmin', 'zmax']
    df = pd.read_csv(vectordir + vectorfile, sep=" ", header=None , names=cols_headings)
    df['file'] = df.apply(lambda r: framename(checkfiledir,r,vectorfile),axis = 1)
    df['collision'] = df.apply(lambda r: hascollision(r),axis = 1)
    df = df[df.file != "error"]

    # move last two collums to the start
    cols = df.columns.tolist()
    cols = cols[-2:] + cols[:-2]
    
    # return data frame
    return df[cols]
    
    


## Generation of the Features starts here

In [16]:
PRE_COL = 30 #The amount of frames before collision that are considered to drop frames
once = 1


dataframes_path = '/tf/notebooks/collision_avoidance/data/2020-03-10/dataframes/' #set the path of excels with labels
imagedir = '/tf/notebooks/collision_avoidance/data/2020-03-10/images/' #set the path of the images

for file in os.listdir(dataframes_path): #iterator of the all the excels in the directory set in previous line
    filename = os.fsdecode(file) #gets the name of the current excel file
    df1 = pd.read_excel(dataframes_path + filename) #reads from the selected excel to panda
    #for index, row in df1.iterrows(): #iterator of all the rows in the current dataframe excel
    #    if(row['collision'] == 1): #finds out which frame has the first collision
    #        firstcol = index #stores the index that corresponds to the first collision
    #        break 
    #droptarget = df1.shape[0]-(df1.shape[0]-firstcol+PRE_COL)-int((df1.shape[0]-firstcol+PRE_COL)*1.1) #calculates the frame that we are using to drop non important frames
    #if(droptarget > 0):
    #    df1 = df1.drop(df1.index[0:droptarget]) #drops the non important non collision frames      
    if once: #in the first run our final panda is the same as the first read excel
        df2 = df1 #df2 will be our panda with the data from all the frames
        once = 0
    else:    
        df2 = pd.concat([df2, df1]) #on all the other runs concatenates the data read to the complete panda

df2 = df2.sort_values(by=['file'],ascending=True) #sorts the panda by column 'file' to order all
df2 = df2.set_index('file') # sets the index as the value of the file column

In [17]:
total_images = df2.shape[0] #stors the total number of rows that correspond to the amount of images to annalize
i = 0
once2 = 1

for i in range(total_images): #iterator of all the images in the dataset
    image = load(imagedir+df2.index[i] +'.png') #loads the current image
    pred = model.predict(image) # extracts the indexes of the current image
    currentPredDF = generateDFprediction(df2.index[i],pred) #generates a panda with the current image name and index values
    if once2: # in the first run our final panda only has the values of the first image
        df3 = currentPredDF #df3 will be our panda with the indexes of all the images
        once2 = 0
    else:
        df3 = pd.concat([df3, currentPredDF]) #concatenates the new values extracted to our final panda 
        
df3 = df3.set_index('file')# sets the index as the value of the file column

In [18]:
finalDF = pd.concat([df2,df3], axis=1) #concatenates our two complete pandas into our goal panda, almost there!!!

In [19]:
NAME = f"{NET_TO_PREDICT}-{CSV_NAME}-{int(time.time())}"
finalDF.to_csv(f"features_csv/{NAME}.csv")  #writes our complete panda to a csv! We've made it with no help!!! we can now do the most basic excercise!!
print(f"features_csv/{NAME}.csv")

features_csv/MBV2-TrainedMB-1586455938.csv


In [21]:
18826 * 0.95

17884.7