## Import Necessary Packages

In [None]:
import os
import glob
import joblib
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.applications.vgg16 import preprocess_input, VGG16
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dropout, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import classification_report

from annoy import AnnoyIndex

## Step 01 : Data Preparation

In [None]:
CLASSES = ["Bread", "Dairy product", "Dessert", 
           "Egg",   "Fried food",    "Meat", 
           "Noodles-Pasta", "Rice",  "Seafood", 
           "Soup",  "Vegetable-Fruit"
          ]

dataDir = "//kaggle//input//food11-image-dataset//"
trainDir = "//kaggle//input//food11-image-dataset//training//**"
testDir = "//kaggle//input//food11-image-dataset//evaluation//**"
valDir = "//kaggle//input//food11-image-dataset//validation//**"

img_shape = (224, 224, 3)

totalTrain = len([1 for i in glob.iglob(trainDir, recursive=True) if os.path.isfile(i)])
totalVal   = len([1 for i in glob.iglob(valDir,   recursive=True) if os.path.isfile(i)])
totalTest  = len([1 for i in glob.iglob(testDir,  recursive=True) if os.path.isfile(i)])

## Step 02 : Data Preprocessing and Augmentation

In [None]:
# Define the Data Generator
trainAug = ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

valAug = ImageDataGenerator()

# Apply a Processing Technique on Data : Pixel Subtraction
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean


# Initialize Data Generators
trainGen = trainAug.flow_from_directory(
    directory= dataDir + "training",
    class_mode= "categorical",
    target_size= img_shape[0:2],
    color_mode= "rgb",
    shuffle= True,
    batch_size= 32)

valGen = valAug.flow_from_directory(
    directory= dataDir + "validation",
    class_mode= "categorical",
    target_size= img_shape[0:2],
    color_mode= "rgb",
    shuffle= False,
    batch_size= 32)

testGen = valAug.flow_from_directory(
    directory= dataDir + "evaluation",
    class_mode= "categorical",
    target_size= img_shape[0:2],
    color_mode= "rgb",
    shuffle= False,
    batch_size= 32)

## Step 03 : Model Training using Transfer Learning

### Part 01 : Model Warm Up

In [None]:
def create_model(img_shape, base):
    headModel = base.output
    headModel = Flatten(name="flatten")(headModel)
    headModel = Dense(512, activation="relu")(headModel)
    headModel = Dropout(0.5)(headModel)
    headModel = Dense(len(CLASSES), activation="softmax")(headModel)
    model = Model(inputs=base.input, outputs=headModel)
    opt = SGD(lr=1e-4, momentum=0.9)
    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
    return model

def plot_history(N, title):
    plt.figure()
    plt.plot(N.history["loss"], label="training loss", color ="b", marker='x')
    plt.plot(N.history["val_loss"], label="validation loss", color ="g", marker='x')
    plt.plot(N.history["accuracy"], label="training accuracy", color="lightgrey")
    plt.plot(N.history["val_accuracy"], label="validation accuracy", color="r")
    plt.xlabel("epochs")
    plt.ylabel("accuracy/loss")
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
baseModel = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=img_shape), pooling="max")

for layer in baseModel.layers:
    layer.trainable = False
    
model = create_model(img_shape, baseModel)

H = model.fit_generator(
    trainGen,
    steps_per_epoch= totalTrain // 32,
    validation_data= valGen,
    validation_steps= totalVal // 32,
    epochs=50)

In [None]:
model.save_weights("warmup_weights.h5")

In [None]:
plot_history(H, "History of VGG16 Model : Warm Up")
testGen.reset()

test_history = model.predict_generator(
    testGen,
    steps=(totalTest//32)+1
)

predicted_class_indices = np.argmax(test_history,axis=1)

report = classification_report(
    testGen.classes, 
    predicted_class_indices, 
    target_names=testGen.class_indices.keys())

print(report)

### Part 02 : Adjust Model Original Weights

In [None]:
# Reset Data Generators
trainGen.reset()
valGen.reset()

for layer in baseModel.layers[15:]:
    layer.trainable = True

model = create_model(img_shape, baseModel)

H = model.fit_generator(
    trainGen,
    steps_per_epoch= totalTrain // 32,
    validation_data= valGen,
    validation_steps= totalVal // 32,
    epochs= 20)

In [None]:
plot_history(H, "History of VGG16 Model : Adjusting Weights")
testGen.reset()

test_history = model.predict_generator(
    testGen,
    steps=(totalTest//32)+1
)

predicted_class_indices = np.argmax(test_history,axis=1)

report = classification_report(
    testGen.classes, 
    predicted_class_indices, 
    target_names=testGen.class_indices.keys())

print(report)

In [None]:
model.save_weights("adjust_weights.h5")

## Step 04 : Feature Extraction

In [None]:
FEATURES = ["bread_features.joblib",  "dairy_features.joblib",     "dessert_features.joblib", 
           "egg_features.joblib",    "friedfood_features.joblib", "meat_features.joblib",
           "pasta_features.joblib",  "rice_features.joblib",      "seafood_features.joblib",   
           "soup_features.joblib",   "fruit_features.joblib"]

def preprocess_image(path, shape):
    image = load_img(path, target_size=shape)
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)
    return image

def extractor(path, model, shape):
    feats = []
    names = [] 
    for imgname in os.listdir(path):
        image = preprocess_image(os.path.join(path, imgname), shape)
        feat  = model.predict(image)
        feat  = np.array(feat)
        names.append(imgname)
        feats.append(feat.flatten())

    return np.array(names), np.array(feats)


In [None]:
dataDir = "//kaggle//input//food11cbir//food11CBIR//"

i = 0
for folder in os.listdir(dataDir):
    embedding = extractor(os.path.join(dataDir, folder), baseModel, img_shape)
    joblib.dump(embedding, os.path.join("//kaggle//working//embeddings", FEATURES[i]), compress=True)
    i += 1

## Step 05 : Approximate Nearest Neighbor Search

In [None]:
INDEXES = ["breadIndex.ann",  "dairyIndex.ann",     "dessertIndex.ann", 
           "eggIndex.ann",    "friedfoodIndex.ann", "meatIndex.ann",
           "pastaIndex.ann",  "riceIndex.ann",      "seafoodIndex.ann",   
           "soupIndex.ann",   "fruitIndex.ann"]


for i in range(11):
    index = AnnoyIndex(512, "euclidean")
    names, embs = joblib.load(os.path.join("//kaggle//input//embeddings", FEATURES[i]))
    for j in range(len(names)):
        index.add_item(j, embs[j]) 
    
    index.build(7) 
    index.save(os.path.join("annoy_indexes", INDEXES[i]), prefault=False)
