In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import sklearn

from keras.models import Sequential
from keras import optimizers
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.callbacks import EarlyStopping
from keras import backend as K
from sklearn.metrics import roc_auc_score, roc_curve, auc, r2_score, f1_score
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input

from sklearn.preprocessing import StandardScaler, MinMaxScaler

Using TensorFlow backend.


In [None]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 42

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

In [2]:
import time, sys
from IPython.display import clear_output

def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    
    text = "Extraction: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [3]:
import glob
number_of_elements = 10000  
    
inputs = keras.Input(shape=(300, 400, 3))
base_model = keras.applications.InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(300, 400, 3))
x = base_model(inputs)
x = keras.layers.GlobalAveragePooling2D()(x)
model = keras.Model(inputs,x)

model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 300, 400, 3)       0         
_________________________________________________________________
inception_resnet_v2 (Model)  (None, 8, 11, 1536)       54336736  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1536)              0         
Total params: 54,336,736
Trainable params: 54,276,192
Non-trainable params: 60,544
_________________________________________________________________


In [4]:
features_db = np.zeros([number_of_elements,1536])

In [None]:
for i in range(number_of_elements):
  img_path = 'food/'+str(i).zfill(5)+'.jpg'
  img = image.load_img(img_path, target_size=(300, 400))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  features = model.predict(x).astype(np.float32)

  features_db[i,:] = features.reshape(1, 1536)

  update_progress(i / number_of_elements)
update_progress(1)

Extraction: [#######-------------] 32.7%


In [None]:
del model
print(features_db.shape)

In [None]:
np.savetxt('features_db_InceptionResNetV2.csv', features_db, delimiter=',', comments = '')
# The features database has been saved so that each time we can simply load the datatbase instead 
# of extracting it every time

In [None]:
features_db = np.loadtxt('features_db_InceptionResNetV2.csv', delimiter = ',', dtype = np.float32)
print(features_db.shape)

In [None]:
Train_index = np.loadtxt('train_triplets.txt', delimiter = ' ').astype(int)
print(Train_index.shape)

In [None]:
original_train_num = 500

Training_set = Train_index[0:original_train_num,:]
image_exist = np.unique(Training_set)

In [None]:
print(image_exist.shape)
print(image_exist)
print(Training_set.shape)

In [None]:
Validation_set = np.array([]).reshape(0,3)
image_val_exist = np.unique(Validation_set)
print(image_val_exist)

In [None]:
for i in range(original_train_num, Train_index.shape[0]):
    update_progress((i- original_train_num)/( Train_index.shape[0] - original_train_num))
    
    mask = np.isin(Train_index[i,:], image_exist)
    mask_val = np.isin(Train_index[i,:], image_val_exist)
              
    if mask.any() and (not mask_val.any()):
        Training_set = np.append(Training_set, Train_index[i,:].reshape(1,3),axis = 0)
        image_exist = np.unique(Training_set)
      
    if not mask.any():
        Validation_set = np.append(Validation_set, Train_index[i,:].reshape(1,3), axis = 0)
        image_val_exist = np.unique(Validation_set)

update_progress(1)      

In [None]:
print('Validation')
print(Validation_set.shape)
image_val_exist = np.unique(Validation_set)
print(image_val_exist.shape)

print('Training')
print(Training_set.shape)
mask = np.isin(image_val_exist, image_exist)

print(np.count_nonzero(mask))

In [None]:
np.random.shuffle(Training_set)
np.random.shuffle(Validation_set)

In [None]:
data_train_index_X = np.zeros([Training_set.shape[0], 3])

for i in range(Training_set.shape[0]):
  if i < Training_set.shape[0]//2:
    data_train_index_X[i,:] = Training_set[i,:]
  else:
    data_train_index_X[i,0] = Training_set[i,0]
    data_train_index_X[i,1] = Training_set[i,2]
    data_train_index_X[i,2] = Training_set[i,1]

In [None]:
data_val_index_X = np.zeros([Validation_set.shape[0], 3])

for i in range(Validation_set.shape[0]):
  if i < Validation_set.shape[0]//2:
    data_val_index_X[i,:] = Validation_set[i,:]
  else:
    data_val_index_X[i,0] = Validation_set[i,0]
    data_val_index_X[i,1] = Validation_set[i,2]
    data_val_index_X[i,2] = Validation_set[i,1]

In [None]:
data_train_label_Y = np.zeros(Training_set.shape[0])
for i in range(Training_set.shape[0]//2):
    data_train_label_Y[i] = 1
    
print(data_train_label_Y.shape)
print(data_train_label_Y) 

In [None]:
data_val_label_Y = np.zeros(Validation_set.shape[0])
for i in range(Validation_set.shape[0]//2):
    data_val_label_Y[i] = 1
    
print(data_val_label_Y.shape)
print(data_val_label_Y)    

In [None]:
features_data_train = np.zeros([data_train_index_X.shape[0],3,features_db.shape[1]])

for i in range(data_train_index_X.shape[0]):
    features_data_train[i,0,:] = features_db[int(data_train_index_X[i,0]),:]
    features_data_train[i,1,:] = features_db[int(data_train_index_X[i,1]),:]
    features_data_train[i,2,:] = features_db[int(data_train_index_X[i,2]),:]
    
print(features_data_train.shape)

In [None]:
features_data_val = np.zeros([data_val_index_X.shape[0],3,features_db.shape[1]])

for i in range(data_val_index_X.shape[0]):
    features_data_val[i,0,:] = features_db[int(data_val_index_X[i,0]),:]
    features_data_val[i,1,:] = features_db[int(data_val_index_X[i,1]),:]
    features_data_val[i,2,:] = features_db[int(data_val_index_X[i,2]),:]
    
print(features_data_val.shape)

In [None]:
del features_db

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
features_data_train = features_data_train.reshape(features_data_train.shape[0],-1)
print("training set reshaped")
features_data_val = features_data_val.reshape(features_data_val.shape[0],-1)
print("validation set reshaped")
                
scaler = MinMaxScaler()
scaler.fit(features_data_train)
print("scaler fitted")
features_data_train = scaler.transform(features_data_train)
print("training set standardized")

features_data_val = scaler.transform(features_data_val)
print("validation set standardized")

In [None]:
y_predict = np.zeros(features_data_train.shape[0])
print(y_predict.shape)

In [None]:
batch_size = 100
epochs = 40

# Neural network
# Define ANN architecture
ANN = Sequential()
ANN.name = "ANN"

ANN.add(Dense(1000, activation='relu')) 
ANN.add(Dropout(0.5))
ANN.add(Dense(1000, activation='relu')) 
ANN.add(Dropout(0.3))
ANN.add(Dense(20, activation='relu'))
ANN.add(Dense(1, activation='sigmoid'))

optimizer = optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
ANN.compile(loss='binary_crossentropy',
                optimizer=optimizer,
                metrics=['accuracy', precision_m, recall_m, f1_m])

EarlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss',
#                                                   min_delta=0,
                                              patience=3,
                                              verbose=0, 
                                              mode='auto')

history = ANN.fit(features_data_train, data_train_label_Y,
                    batch_size=batch_size,
                    epochs=epochs,
                    shuffle=True,
                    verbose=1,
                    callbacks= [EarlyStopping],
                    validation_data=(features_data_val, data_val_label_Y))

ANN.summary() 

score = ANN.evaluate(features_data_val, data_val_label_Y, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test precision:', score[2])
print('Test recall:', score[3])
print('Test F1:', score[4])

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for f1
plt.plot(history.history['f1_m'])
plt.plot(history.history['val_f1_m'])
plt.title('model F1 score')
plt.ylabel('F1')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

y_predict = ANN.predict(features_data_val)
print(y_predict.shape)


fpr, tpr, thresholds = roc_curve(data_val_label_Y, y_predict)
auc = sklearn.metrics.auc(fpr, tpr)
roc_auc_score(data_val_label_Y,y_predict)
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='Keras (area = {:.3f})'.format(auc))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
AUC = auc
    
print('AUC = {}'.format(AUC))
F1 = f1_score(data_val_label_Y,np.around(y_predict))
print('F1 score = {}'.format(F1))

In [None]:
print(data_train_label_Y)

In [None]:
features_db = np.loadtxt('features_db_InceptionResNetV2.csv', delimiter = ',', dtype = np.float32)
del data_train_index_X
del Training_set
del data_val_index_X
del Validation_set

In [None]:
# Test element extraction 

Test_index = np.loadtxt('test_triplets.txt', delimiter = ' ').astype(int)
print(Test_index.shape)
print(Test_index[:4,:])

Test_data = np.zeros([Test_index.shape[0],3,features_db.shape[1]])

for i in range(Test_index.shape[0]):
    Test_data[i,0,:] = features_db[int(Test_index[i,0]),:]
    Test_data[i,1,:] = features_db[int(Test_index[i,1]),:]
    Test_data[i,2,:] = features_db[int(Test_index[i,2]),:]
    
print(Test_data.shape)

In [None]:
del features_db

In [None]:
Test_data = Test_data.reshape(Test_data.shape[0],-1)
print("test set reshaped")
Test_data = scaler.transform(Test_data)
print("test set standardized")

In [None]:
test_predict = ANN.predict(Test_data)

In [None]:
np.savetxt('predict.txt', np.around(test_predict), delimiter='',fmt = '%d', comments = '') 

In [None]:
del test_predict
del Test_data

In [None]:
ANN = []