In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import confusion_matrix, classification_report # analyzing model's performance

# Creating File DataFrame

In [None]:
# copy path from images folder on right bar
# convert directory to Path object
image_dir = Path("/kaggle/input/")

filepaths = list(image_dir.glob(r'**/*.jpg')) # Path object has "glob" function that lets us target files in directory (**/*.jpg is an expression to search anything that ends with jpg in directory)
os.path.split(filepaths[0]) # split path object so we get prefix and file
os.path.split(os.path.split(filepaths[0])[0])[1] # get class name only (which is in the file name)

# map each jpg into class name
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

# convert into pandas series and concat into dataframe
filepaths = pd.Series(filepaths, name='Filepath').astype(str) # IMAGES (convert path obj to string)
labels = pd.Series(labels, name='Label')                      # LABELS (already a string)

images = pd.concat([filepaths, labels], axis=1) # axis=1 means side by side
images

In [None]:
# There are 1000 images for 101 foods, which takes forever to train. 
# Instead, we will only take 100 images from 101 foods for sake of speed.

# axis=0 means concat on top of each other
# frac=1 means simple shuffling without removing anything
# random_state=1 means making the random shuffle reproducible
# reset_index because indices were also shuffled
# drop=True to prevent old indices from becoming new column
category_samples = []
for category in images['Label'].unique():
    category_slice = images.query("Label == @category") # for every @category / "label"
    category_samples.append(category_slice.sample(100, random_state=1)) # randomly sample 100
image_df = pd.concat(category_samples, axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True) 

image_df['Label'].value_counts()

# Train-Test Split

In [None]:
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

# Creating Generators

Generators are a nice way to load images one batch at a time so we don't run out of memory. We set batch size to 32, load 32 images, train on images, recycle memory for the next batch size so we don't run out of memory.

In [None]:
# preprocessing function preprocesses a tensor or Numpy array encoding a batch of images. The images are converted from RGB to BGR, then each color channel is zero-centered with respect to the ImageNet dataset, without scaling.
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input,
    validation_split=0.2
    # data augmentation
    rotation_range=5, 
#     width_shift_range=0.1, 
#     height_shift_range=0.1, 
#     zoom_range=0.1,
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

In [None]:
# flow_from_dataframe = df specifies which images we're using, generator takes images from dataframe, trains on them, and recycles them
# color_mode=rgb = we're using rgb images
# class_mode=categorical = multiclass classification task
# shuffle=True = shuffle after each epoch during training
# seed=42 = so we can reproduce results
# subset=training = only available if we're using a validation split, and it specfifies if we're using the 80% training or 80% validation
# train_images holds specifications on how to pull files. This later goes into fit function
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    color_mode="rgb",
    class_mode="categorical",
    target_size=(224, 224),
    batch_size=32,
    shuffle=True,
    seed=42,
    subset="training"
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    color_mode="rgb",
    class_mode="categorical",
    target_size=(224, 224),
    batch_size=32,
    shuffle=True,
    seed=42,
    subset="validation"
)

# no shuffling, no seed, no subset
test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col="Filepath",
    y_col="Label",
    color_mode="rgb",
    class_mode="categorical",
    target_size=(224, 224),
    batch_size=32,
    shuffle=False
)

# Modeling

In [None]:
# input_shape differs for each model (3 is for RGB)
# include_top = do we want to keep final classification layer that the original model was trained on (i.e imagenet has 1000 classes, but we want to put our own food classification layer)
# avg pooling = output of pre-trained model is 1D (average across all but 1D so we get 1D output)
pretrained_model = tf.keras.applications.ResNet50(
    input_shape=(150,150,3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)

# IMPORTANT: does not mess with original imagenet weights
# using a pre-trained model = transfer learning
# pre-trained model is built to be very good at extracting useful features from images (known as feature exrtactor)
# the convolutional layer that isn't the top (i.e classification) is meant to extract features (2D features like shapes) from images
pretrained_model.trainable = False

In [None]:
# input layer
inputs = pretrained_model.input

# 2 dense layers
x = tf.keras.layers.Dense(120, activation='relu')(pretrained_model.output)
x = tf.keras.layers.Dense(120, activation='relu')(x)

# classification layer
outputs = tf.keras.layers.Dense(101, activation="softmax")(x) # 101 classes, probabilities of all 101 classes sum to 1

model = tf.keras.Model(inputs, outputs)

# only the last 3 layers are trainable params (avg_pool, dense_1, dense_2)
# print(model.summary())

# Training

turn on GPU acceleration on Kaggle for faster training

In [None]:
# reason we don't use sparse_catergorical_entropy is because when we use image data generators it encodes classes as vectors. Instead of passing in integers for each class, we get one-hot vector for the class. When you pass it in vector form use categorical_crossentropy, when passing it in integer form use sparse_categorical_entropy
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# earlystopping callback function to stop when validation loss stops improving
history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=30,
#     callbacks=[
#         tf.keras.callbacks.EarlyStopping(
#             # when validation loss stops improving for 3 consecutive epochs, restore weights from the best epoch
#             monitor="val_loss",
#             patience=3,
#             restore_best_weights=True
#         )
#     ]
)

# Results

In [None]:
results = model.evaluate(test_images)
print("Test accuracy: {:.2f}%".format(results[1]*100))

In [None]:
# get index of highest probability value (axis 1 to get highest probability for each test image)
predictions = np.argmax(model.predict(test_images), axis=1)

# from sklearn - actual labels vs predicted labels
cm = confusion_matrix(test_images.labels, predictions)

# from sklearn - list of how well each class was predicted
clr = classification_report(test_images.labels, predictions, target_names=test_images.class_indices)

In [None]:
plt.figure(figsize=(30,30))
# fmt='g' so we see integers and not scientific notation
# cbar=false turn off color bar
sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap="Blues", cbar=False)
# spacing of ticks is np.arrange / evenly among 101 classes
# test_images.class_indices is a mapping of class to index (uses this dictionary for labels)
plt.xticks(ticks=np.arange(101) + 0.5, labels=test_images.class_indices, rotation=90)
plt.yticks(ticks=np.arange(101) + 0.5, labels=test_images.class_indices, rotation=0)
plt.xlabel=("Predicted")
plt.ylabel=("Actual")
plt.title("Confusion Matrix")
plt.show()

In [None]:
print("Classification Report:\n-----------------------\n", clr)