# Food Classification

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import pathlib
import os
import fnmatch
import cv2
import numpy as np
import pandas as pd
import string
import time
import json
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import IPython
# make sure we use tensorflow 2.0
import tensorflow as tf
# print(tf.__version__)

# import padding library
from tensorflow.keras.preprocessing.sequence import pad_sequences

# import our model, different layers and activation function 
from tensorflow.keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu, sigmoid, softmax
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv2D,MaxPool2D,Activation,GlobalAveragePooling2D,BatchNormalization,Flatten,Dense,Dropout


In [2]:
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.test.gpu_device_name())
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


2.2.0
Num GPUs Available:  1
/device:GPU:0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6721308252498532172
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 16787196714261914756
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 7245153349148330375
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15287485760
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14382107032506434576
physical_device_desc: "device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:00:04.0, compute capability: 7.0"
]


In [3]:
# import keras.backend as K

# dtype='float16'
# K.set_floatx(dtype)

# # default is 1e-7 which is too small for float16.  Without adjusting the epsilon, we will get NaN predictions because of divide by zero problems
# K.set_epsilon(1e-4)


In [4]:
!pip install -q h5py
!pip install memory_profiler
%load_ext memory_profiler




In [5]:
%memit

peak memory: 522.14 MiB, increment: 0.07 MiB


In [6]:
root_path = os.getcwd()
print(root_path)

/home/jupyter/FoodDetector/FoodClassification


In [7]:
# Draw result plot
def plot_history(history, key='loss'):
  plt.figure(figsize=(12,8))
  val = plt.plot(history.epoch, history.history['val_'+key],
                  '--', label=key.title() +' Val')
  plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
              label=key.title() + ' Train')

  plt.xlabel('Epochs')
  plt.ylabel(key.replace('_',' ').title())
  plt.legend()

  plt.xlim([0,max(history.epoch)])

## Preprocessing

In [8]:
labels_paths_path = os.path.join(root_path,'datasets/UECFOOD256crop_food_labels_paths.csv')
print(labels_paths_path)

/home/jupyter/FoodDetector/FoodClassification/datasets/UECFOOD256crop_food_labels_paths.csv


In [9]:
df = pd.read_csv(labels_paths_path)
df.shape[0]

31395

In [10]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31395 entries, 0 to 31394
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Label   31395 non-null  object
 1   Path    31395 non-null  object
dtypes: object(2)
memory usage: 490.7+ KB


In [11]:
df.head(5)

Unnamed: 0,Label,Path
0,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
1,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
2,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
3,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...
4,rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...


In [12]:
df['Label'].value_counts()

miso soup                  728
rice                       620
ramen noodle               353
green salad                342
beef curry                 246
                          ... 
green curry                100
Crispy Noodles             100
noodles with fish curry    100
chop suey                  100
Pork with lemon            100
Name: Label, Length: 255, dtype: int64

In [13]:
df['Label'].unique()

array(['rice', 'eels on rice', 'pilaf', "chicken-'n'-egg on rice",
       'pork cutlet on rice', 'beef curry', 'sushi', 'chicken rice',
       'fried rice', 'tempura bowl', 'bibimbap', 'toast', 'croissant',
       'roll bread', 'raisin bread', 'chip butty', 'hamburger', 'pizza',
       'sandwiches', 'udon noodle', 'tempura udon', 'soba noodle',
       'ramen noodle', 'beef noodle', 'tensin noodle', 'fried noodle',
       'spaghetti', 'Japanese-style pancake', 'takoyaki', 'gratin',
       'sauteed vegetables', 'croquette', 'grilled eggplant',
       'sauteed spinach', 'vegetable tempura', 'miso soup', 'potage',
       'sausage', 'oden', 'omelet', 'ganmodoki', 'jiaozi', 'stew',
       'teriyaki grilled fish', 'fried fish', 'grilled salmon',
       'salmon meuniere', 'sashimi', 'grilled pacific saury', 'sukiyaki',
       'sweet and sour pork', 'lightly roasted fish',
       'steamed egg hotchpotch', 'tempura', 'fried chicken',
       'sirloin cutlet', 'nanbanzuke', 'boiled fish',
       '

In [14]:
label_to_index = dict((name,index) for index,name in enumerate(df['Label'].unique()))

In [15]:
df['Category'] = df['Label'].apply(lambda x: label_to_index[x])

In [16]:
df.sample(5)

Unnamed: 0,Label,Path,Category
23437,Thai papaya salad,/home/jupyter/FoodDetector/datasets/UECFOOD256...,182
21887,xiao long bao,/home/jupyter/FoodDetector/datasets/UECFOOD256...,168
31174,eight treasure rice,/home/jupyter/FoodDetector/datasets/UECFOOD256...,253
2340,croissant,/home/jupyter/FoodDetector/datasets/UECFOOD256...,12
30481,salt & pepper fried shrimp with shell,/home/jupyter/FoodDetector/datasets/UECFOOD256...,247


In [17]:
df['Label'].value_counts()

miso soup                  728
rice                       620
ramen noodle               353
green salad                342
beef curry                 246
                          ... 
green curry                100
Crispy Noodles             100
noodles with fish curry    100
chop suey                  100
Pork with lemon            100
Name: Label, Length: 255, dtype: int64

In [18]:
from sklearn.model_selection import train_test_split
# rest, samples = train_test_split(df, test_size=0.03, 
#                                random_state=150, 
#                                stratify=df['Label'])
train, test = train_test_split(df, test_size=0.2, 
                               random_state=150)
# train.sample(5)

In [19]:
%memit

peak memory: 555.01 MiB, increment: 0.02 MiB


In [20]:
# Split train, test and validation set
BATCH_SIZE = 32

TRAIN_SIZE = train.shape[0]
TEST_SIZE = test.shape[0]
NUM_CLASSES = df['Label'].nunique()
AUTOTUNE = tf.data.experimental.AUTOTUNE

MOBILE_NET_IMG_SIZE = 192
XCEPTION_IMG_SIZE = 299
EfficientNetB7_IMAGE_SIZE = 256
IMAGE_SIZE = EfficientNetB7_IMAGE_SIZE


STEPS_PER_TRAIN_EPOCH = tf.math.ceil(train.shape[0]/BATCH_SIZE)
STEPS_PER_TEST_EPOCH = tf.math.ceil(test.shape[0]/BATCH_SIZE)


# train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)
# test_dataset = test_dataset.batch(BATCH_SIZE, drop_remainder=True)

In [21]:
def preprocess(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMAGE_SIZE, IMAGE_SIZE])
    image /= 255.0

    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    image = tf.image.convert_image_dtype(image, tf.float32)    
    image = (image*2) - 1  # normalize to [-1,1] range
    image = tf.image.per_image_standardization(image)
    
    return image

def augmentation(image, label):
#     image = tf.image.random_contrast(image, lower=0.0, upper=1.0,seed=115)    
    image = tf.image.random_brightness(image, .5)
    image = tf.image.random_contrast(image, lower=0.0, upper=1.0)
    
    image = tf.image.random_flip_left_right(image,seed=115)
    image = tf.image.random_flip_up_down(image,seed=115)
    return image, label

def load_and_preprocess_from_path_and_label(path,label):
    return preprocess(path), label

def prepare_for_training(ds, cache=False,shuffle_buffer_size=100,augment=False):
    if cache:
        if isinstance(cache,str):
            ds = ds.cache(cache)
        else: 
            ds = ds.cache()
    if shuffle_buffer_size > 0:
        ds = ds.shuffle(buffer_size=shuffle_buffer_size)    
    # repeat forever
    ds = ds.repeat()
    if augment:
        ds.map(augmentation, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    
    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds

In [22]:
train_dataset = tf.data.Dataset.from_tensor_slices((train['Path'],train['Category']))
test_dataset = tf.data.Dataset.from_tensor_slices((test['Path'],test['Category']))

In [23]:
for feat, targ in train_dataset.take(5):
  print ('Features: {}, Target: {}'.format(feat, targ))


Features: b'/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256/218/211024.jpg', Target: 216
Features: b'/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256/116/11312.jpg', Target: 114
Features: b'/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256/87/97.jpg', Target: 85
Features: b'/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256/164/47899.jpg', Target: 162
Features: b'/home/jupyter/FoodDetector/datasets/UECFOOD256crop/UECFOOD256/49/4976.jpg', Target: 48


In [24]:
%memit

peak memory: 1718.63 MiB, increment: 0.01 MiB


In [25]:
train_dataset = train_dataset.map(lambda path,label: load_and_preprocess_from_path_and_label(path,label),
                       num_parallel_calls=AUTOTUNE)
test_dataset = test_dataset.map(lambda path,label: load_and_preprocess_from_path_and_label(path,label),                      
                       num_parallel_calls=AUTOTUNE)

In [26]:
train_dataset = prepare_for_training(train_dataset,shuffle_buffer_size=512,augment=True,cache=True)
test_dataset = prepare_for_training(test_dataset,shuffle_buffer_size=512,cache=True)


In [27]:
print(train_dataset)
print(test_dataset)

<PrefetchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int64)>
<PrefetchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int64)>


In [28]:
%memit

peak memory: 1721.67 MiB, increment: 0.00 MiB


# Build model

In [29]:
!pip install efficientnet
import efficientnet.tfkeras as efn



In [30]:
import tensorflow.keras as keras
class FocalLoss(keras.losses.Loss):
    def __init__(self, gamma=2., alpha=4.,
                 reduction=keras.losses.Reduction.AUTO, name='focal_loss'):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})
        """
        super(FocalLoss, self).__init__(reduction=reduction,
                                        name=name)
        self.gamma = float(gamma)
        self.alpha = float(alpha)

    def call(self, y_true, y_pred):
        """
        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(
            tf.subtract(1., model_out), self.gamma))
        fl = tf.multiply(self.alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)



In [31]:
def create_model(load_from_path=None):
    if load_from_path == None:
        base_model =  efn.EfficientNetB7(weights='imagenet', 
                                         include_top=False, 
                                         input_shape=(IMAGE_SIZE,IMAGE_SIZE, 3))
        base_model.trainable= True
        
        print("Number of layers in the base model:", len(base_model.layers))
        fine_tune_at = int(0.5 * len(base_model.layers))

        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False
        x = base_model.output
        x1 = tf.keras.layers.GlobalMaxPooling2D()(x)
        x2 = tf.keras.layers.GlobalAveragePooling2D()(x)
        x = tf.keras.layers.Concatenate()([x1, x2])
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.5)(x)
        x = tf.keras.layers.Dense(512, activation='relu')(x)
        y = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)

        model = tf.keras.Model(inputs=base_model.input, outputs=y)
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer='nadam',
                      metrics=["accuracy"])
    else:
        model = tf.keras.models.load_model(load_from_path)
    return model

In [32]:
checkpoint_path = "training/cp.ckpt"
model = create_model()

model.load_weights(checkpoint_path)
# model.summary()


Number of layers in the base model: 806


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7ff96e62a610>

In [33]:
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print('\nLearning rate for epoch {} is {}'.format(epoch + 1,
                                                          model.optimizer.lr.numpy()))


In [34]:
%memit

peak memory: 2142.43 MiB, increment: 0.00 MiB


In [None]:
%%time 
max_epochs = 50
earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1) # Create EarlyStopping Callback
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=0.2, # Create ReduceLROnPlateau Callback
                                                 patience=1, 
                                                 mode='min',
                                                 min_lr=1e-13)

# Create a callback that saves the model's weights
# by default it saves the weights every epoch
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 save_best_only=True,
                                                 verbose=1)
print_lr = PrintLR()
history = model.fit(train_dataset, epochs=max_epochs,     
                    validation_steps=STEPS_PER_TEST_EPOCH,
                    steps_per_epoch=STEPS_PER_TRAIN_EPOCH,
                    validation_data=test_dataset,
                    callbacks=[cp_callback,reduce_lr,earlystop,print_lr], # Add callback to training process
                    verbose=1)


Epoch 1/50
Epoch 00001: val_loss improved from inf to 1.32716, saving model to training/cp.ckpt

Learning rate for epoch 1 is 0.0010000000474974513
Epoch 2/50
Epoch 00002: val_loss did not improve from 1.32716

Learning rate for epoch 2 is 0.00020000000949949026
Epoch 3/50
Epoch 00003: val_loss improved from 1.32716 to 1.16738, saving model to training/cp.ckpt

Learning rate for epoch 3 is 0.00020000000949949026
Epoch 4/50
Epoch 00004: val_loss did not improve from 1.16738

Learning rate for epoch 4 is 4.0000002627493814e-05
Epoch 5/50
Epoch 00005: val_loss did not improve from 1.16738

Learning rate for epoch 5 is 8.000000889296643e-06
Epoch 6/50
Epoch 00006: val_loss did not improve from 1.16738

Learning rate for epoch 6 is 1.6000001323845936e-06
Epoch 7/50
Epoch 00007: val_loss did not improve from 1.16738

Learning rate for epoch 7 is 3.200000264769187e-07
Epoch 8/50
Epoch 00008: val_loss did not improve from 1.16738

Learning rate for epoch 8 is 6.400000529538374e-08
Epoch 9/50

In [None]:
model.save('saved_model/EfficientNetB7_model.h5')