In [None]:
# TODO: log total run time, dataset info
# TODO: show some images and histogram from test sequence

In [None]:
import numpy as np
import os
from random import shuffle
import time
import csv

import matplotlib.pyplot as plt
%matplotlib inline

from keras import optimizers, losses, activations, applications
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.layers import Convolution2D, Dense, Input, Flatten, Dropout, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D, Concatenate
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input

from skimage.transform import resize
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.python.keras.callbacks import TensorBoard

# used to download pretrained Inception V3 Keras Model
import ssl
ssl._create_default_https_context = ssl._create_unverified_context 

#DATA_DIR = "/home/spa/coin-vision/ssd-data/all-68-20190123-keras-no-noise/"
#DATA_DIR = "/home/spa/coin-vision/ssd-data/micro-25-20190204/"
DATA_DIR = "/home/spa/coin-vision/ssd-data/mini-400-20190205/"

MODEL_LABELS_FILE_PATH = DATA_DIR + "labels.txt"
MODEL_FILE_PATH = DATA_DIR + "inception_v3_" + time.strftime("%Y%m%d-%H%M%S") + ".hdf5"

train_epochs=5
ROWS=299
COLS=299

train_batch_size = 64
validation_batch_size = 100



tensorboard_log_dir = DATA_DIR + 'logs/'
tensorboard = TensorBoard(log_dir=tensorboard_log_dir)

print('tensorflow version', tf.__version__)
print('TensorBoard log dir: ', tensorboard_log_dir)
print('MODEL_FILE_PATH: ', MODEL_FILE_PATH)




In [None]:
list_paths = []
for subdir, dirs, files in os.walk(DATA_DIR):
    for file in files:
        #print os.path.join(subdir, file)
        filepath = subdir + os.sep + file
        list_paths.append(filepath)

In [None]:
list_train = [filepath for filepath in list_paths if "labeled-images-tr/" in filepath]
list_test = [filepath for filepath in list_paths if "labeled-images-tst/" in filepath]

validation_steps = len(list_test) / validation_batch_size # images-in-test / val-batch-size

# TODO: print amount train / test images

In [None]:
train_idg = ImageDataGenerator(
                               vertical_flip=False,
                               horizontal_flip=False,
                               height_shift_range=0.1,
                               width_shift_range=0.1,
                               rotation_range=20,
                               zoom_range=0.2,
                               shear_range=0.2,
                               preprocessing_function=preprocess_input)

test_idg = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_idg.flow_from_directory(
    DATA_DIR + 'labeled-images-tr/',
    target_size=(ROWS, COLS),
    batch_size = train_batch_size
)
test_gen = test_idg.flow_from_directory(
    DATA_DIR + 'labeled-images-tst/',
    target_size=(ROWS, COLS),
    batch_size = validation_batch_size,
    shuffle = True
)


In [None]:
#labels

In [None]:
labels = test_gen.class_indices.items()

# save to file (is used during prediction)
with open(MODEL_LABELS_FILE_PATH, 'w') as outfile:
    w = csv.writer(outfile)
    for key, value in labels:
        w.writerow([key, str(value)])

# load from file (to make sure same labels are used during train & prediction)
with open(MODEL_LABELS_FILE_PATH, mode='r') as infile:
    reader = csv.reader(infile)
    labels = {rows[0]:int(float(rows[1])) for rows in reader}
    
labels = labels.items()    

In [None]:
# get a batch sample from training set generator to show sample images and histogram
x,y = train_gen.next()

In [None]:
#show some images from train set
# rescale for better view: [-1:1] => [0:255]
img_to_plot_uint8 = (127.5*(x + 1)).astype(np.uint8)
fig=plt.figure(figsize=(20, 20))
columns = 4
rows = 5
for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    plt.imshow(img_to_plot_uint8[i])
plt.show()

In [None]:
# show histogram for one sample used for training
print('show histogram for a train image')
n, bins, patches = plt.hist(x[0,:,:,1].reshape(ROWS, COLS), 25, density=1, facecolor='green', alpha=0.75)
plt.show()

In [None]:

input_shape = (ROWS, COLS, 3)
nclass = len(labels)

base_model = applications.InceptionV3(weights='imagenet', 
                                include_top=False, 
                                input_shape=(ROWS, COLS,3))
base_model.trainable = True

add_model = Sequential()
add_model.add(base_model)
add_model.add(GlobalAveragePooling2D())
#add_model.add(Dropout(0.3))
add_model.add(Dense(nclass, 
                    activation='softmax'))

model = add_model

model.compile(loss='categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy', 'top_k_categorical_accuracy'])
model.summary()

In [None]:
# LOAD MODEL
#model.load_weights(file_path)

tr_steps_per_epoch = len(list_train) / train_batch_size
print('trainig steps per epoc: ', tr_steps_per_epoch)


checkpoint = ModelCheckpoint(MODEL_FILE_PATH, monitor='acc', verbose=1, save_best_only=False, mode='max')

early = EarlyStopping(monitor="acc", mode="max", patience=5)

callbacks_list = [checkpoint, early, tensorboard] #early

history = model.fit_generator(train_gen, 
                              epochs=train_epochs, 
                              steps_per_epoch=tr_steps_per_epoch,
                              shuffle=True, 
                              verbose=True,
                              validation_data=test_gen,
                              validation_steps=validation_steps, # fix me later if works
                              callbacks=callbacks_list)

In [None]:
# RELOAD MODEL from the file (to make sure we use the same model for test/predictions)
model.load_weights(MODEL_FILE_PATH)


In [None]:
# test_gen_final uses same test data as test_idg. The difference is class_mode='binary' (test_idg uses class_mode='categorical' )
test_gen_final = test_idg.flow_from_directory(
    DATA_DIR + 'labeled-images-tst/',
    target_size=(ROWS, COLS),
    batch_size = 100,
    shuffle = False,
    class_mode='binary'
)


In [None]:
steps = 1040
predicts_label_acc = []
y_true_label_acc = []

# TODO: show some images and histogram from test sequence 

for step in range(steps):
    x_test,y_true = test_gen_final.next()
    predicts = model.predict_classes(x_test)
    label_index = {v: k for k,v in labels}
    predicts_label = [label_index[p] for p in predicts]
    y_true_label = [label_index[p] for p in y_true]
    predicts_label_acc.extend(predicts_label)
    y_true_label_acc.extend(y_true_label)
    print("done ", step, " from ", steps)



In [None]:
print(classification_report(predicts_label_acc,y_true_label_acc))


In [None]:
len(list_test)