In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# current cell code gotten from https://www.kaggle.com/stevengolo/classification-and-localization
import xml.etree.ElementTree as etree
ANNOTATION_DIR = '.\VOC2012\Annotations'
def extract_xml_annotation(filename):
    # function gotten from https://www.kaggle.com/stevengolo/classification-and-localization
    """Parse the xml file
    :param filename: str
    """
    z = etree.parse(filename)
    objects = z.findall('./object')
    size = (int(float(z.find('.//width').text)), int(float(z.find('.//height').text)))
    fname = z.find('./filename').text
    dicts = [{obj.find('name').text: [int(float(obj.find('bndbox/xmin').text)),
                                      int(float(obj.find('bndbox/ymin').text)),
                                      int(float(obj.find('bndbox/xmax').text)),
                                      int(float(obj.find('bndbox/ymax').text))]}
             for obj in objects]
    return {'size': size, 'filename': fname, 'objects': dicts}

annotations = []
for filename in sorted(os.listdir(ANNOTATION_DIR)):
    annotation = extract_xml_annotation(os.path.join(ANNOTATION_DIR, filename))
    
    new_objects = []
    for obj in annotation['objects']:
        new_objects.append(obj)
    
    if len(new_objects) == 1:
        annotation['class'] = list(new_objects[0].keys())[0]
        annotation['bbox'] = list(new_objects[0].values())[0]
        annotation.pop('objects')
        annotations.append(annotation)

In [None]:
df = pd.DataFrame(annotations)
# class = 1 means person, class = 0 means not person
def changeToNotPerson(className):
    if (className == 'person'):
        className = '1'
    else:
        className = '0'
    return className
df['class'] = df['class'].apply(changeToNotPerson)
df['class'] = pd.to_numeric(df['class'], downcast='integer')
df.head()

In [None]:
# only needs to run once
from tensorflow.keras.applications.efficientnet import preprocess_input

IMAGE_SHAPE = (224, 224)
from tensorflow.keras.preprocessing import image
images = []
for filename in df['filename']:
    print('reading file ' + filename)
    img = image.load_img('./VOC2012/JPEGImages/' + filename, target_size=IMAGE_SHAPE)
    img = image.img_to_array(img)
    img = (img - np.min(img)) / (np.max(img) - np.min(img))
    preprocess_input(img)
    # normalize image
    images.append(img)
images = np.array(images)

In [None]:
from sklearn.model_selection import train_test_split
# split into training and testing set
X_train, X_test, y_train, y_test = train_test_split(images, np.array(df['class']), test_size=0.15, random_state=42)
# del images

In [None]:
# Initializing and compiling of the model
from tensorflow.keras.applications import EfficientNetB3, MobileNetV2, NASNetMobile, DenseNet169
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input, Activation, Flatten, Dense, Conv2D, MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

neurons = 64
lr = 0.01
outputs = 1

base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=IMAGE_SHAPE + (3,), pooling='max')
print(base_model.summary())
base_model.trainable = False

inputs = Input(shape=IMAGE_SHAPE + (3,))
x = base_model(inputs, training=False)
# A Dense classifier with a single unit (binary classification)
x_next = Dense(100, activation='relu')(x)
x_out = Dense(1, activation='sigmoid')(x_next)
model = Model(inputs, x_out)
model.compile(optimizer=Adam(),
                        loss='binary_crossentropy', metrics = ['accuracy'])
model.summary()

In [None]:
ModelCheck = ModelCheckpoint('bestModel_checkpoint2.h5', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto')
hist = model.fit(X_train, y_train, 
              epochs=5, 
              validation_data=(X_test, y_test),
              callbacks=[ModelCheck]
              )

In [None]:
# loads best model from the training phase
from tensorflow.keras.models import load_model
model = load_model('bestModel_checkpoint2.h5')

In [None]:
import matplotlib.pyplot as plt
f, ax = plt.subplots()
ax.plot([None] + hist.history['accuracy'], '--')
ax.plot([None] + hist.history['val_accuracy'])
# Plot legend and use the best location automatically: loc = 0.
ax.legend(['Train acc', 'Validation acc'], loc = 0)
ax.set_title('Training/Validation acc per Epoch')
ax.set_xlabel('Epoch')
ax.set_ylabel('acc')

In [None]:
# testing with different images from the webcam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import decode_predictions
import numpy as np
IMAGE_SHAPE=(224,224)
img = image.load_img('WIN_20201015_13_50_45_Pro.jpg', target_size=IMAGE_SHAPE)
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
img = (img - np.min(img)) / (np.max(img) - np.min(img))
# img = preprocess_input(img)

In [None]:
%%time
prediction = model.predict(img)

In [None]:
prediction

In [None]:
# converting Keras model into a Tensorflow.js model
from tensorflowjs.converters import save_keras_model
save_keras_model(model, './EN3_PersonNoPerson_classifier_TfJS')