In [1]:
import tensorflow as tf
import pandas as pd
import h5py

tf.get_logger().setLevel('ERROR')
!curl -s https://raw.githubusercontent.com/sivel/speedtest-cli/master/speedtest.py | python -

Retrieving speedtest.net configuration...
Testing from Google Cloud (34.75.53.89)...
Retrieving speedtest.net server list...
Selecting best server based on ping...
Hosted by Farmers Telephone Cooperative, Inc. (Sumter, SC) [122.65 km]: 27.285 ms
Testing download speed................................................................................
Download: 978.49 Mbit/s
Testing upload speed......................................................................................................
Upload: 318.40 Mbit/s


# Import dataset

In [None]:
from google.colab import drive
import os

drive.mount('/content/GoogleDrive', force_remount=True)
path = '/content/GoogleDrive/My Drive/Vietnamese Foods'
os.chdir(path)
!ls

In [3]:
# Move dataset to /tmp cause reading files from Drive is very slow
!cp Dataset/vietnamese-foods-split.zip /tmp
!unzip -q /tmp/vietnamese-foods-split.zip -d /tmp

# Check GPU working

In [4]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [5]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0': raise SystemError('GPU device not found')
print('Found GPU at:', device_name)

Found GPU at: /device:GPU:0


# Preparing data

In [6]:
TRAIN_PATH = '/tmp/Images/Train'
VALIDATE_PATH = '/tmp/Images/Validate'
TEST_PATH = '/tmp/Images/Test'

MODELS_PATH = 'Models'
BEST_MODEL = 'fine_tune_model_best.hdf5'

IMAGE_SIZE = (300, 300)
BATCH_SIZE = 128

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_generator = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 40, 
    width_shift_range = 0.2, 
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)
validate_generator = ImageDataGenerator(rescale=1./255)
test_generator = ImageDataGenerator(rescale=1./255)

In [8]:
generated_train_data = train_generator.flow_from_directory(TRAIN_PATH, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE)
generated_validate_data = validate_generator.flow_from_directory(VALIDATE_PATH, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE)
generated_test_data = test_generator.flow_from_directory(TEST_PATH, target_size=IMAGE_SIZE)

Found 17581 images belonging to 30 classes.
Found 2515 images belonging to 30 classes.
Found 5040 images belonging to 30 classes.


# Evaluation

In [46]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.metrics import TopKCategoricalAccuracy
from tqdm.notebook import tqdm
validate_results = {}
test_results = {}

In [48]:
for folder in tqdm(os.listdir(MODELS_PATH)):
    model_folder = os.path.join(MODELS_PATH, folder)
    if BEST_MODEL in os.listdir(model_folder):
        print('\n========== Evaluate', folder, 'Model ==========')
        model = load_model(os.path.join(model_folder, BEST_MODEL))
        model.compile(
            optimizer = SGD(learning_rate=1e-4, momentum=0.9),
            loss = 'categorical_crossentropy', 
            metrics = [
                'accuracy', 
                TopKCategoricalAccuracy(k=3, name='top_3_accuracy'), 
                TopKCategoricalAccuracy(k=5, name='top_5_accuracy')
            ]
        )
        print('Validate dataset:')
        validate_results[folder] = model.evaluate(generated_validate_data)
        print('Test dataset:', )
        test_results[folder] = model.evaluate(generated_test_data)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


Validate dataset:
Test dataset:

Validate dataset:
Test dataset:

Validate dataset:
Test dataset:

Validate dataset:
Test dataset:

Validate dataset:
Test dataset:



In [65]:
validate_report = pd.DataFrame.from_dict(validate_results, orient='index').iloc[:, 1:]
validate_report.columns = ['Accuracy', 'Top 3 Accuracy', 'Top 5 Accuracy']
validate_report.sort_values(by=['Accuracy'], ascending=False)

Unnamed: 0,Accuracy,Top 3 Accuracy,Top 5 Accuracy
ResNet152V2,0.778529,0.916103,0.958251
InceptionV3,0.747117,0.901789,0.951093
Xception,0.737177,0.901789,0.951491
InceptionResNetV2,0.724056,0.891451,0.943539
VGG19,0.702187,0.885089,0.934791


In [66]:
test_report = pd.DataFrame.from_dict(test_results, orient='index').iloc[:, 1:]
test_report.columns = ['Accuracy', 'Top 3 Accuracy', 'Top 5 Accuracy']
test_report.sort_values(by=['Accuracy'], ascending=False)

Unnamed: 0,Accuracy,Top 3 Accuracy,Top 5 Accuracy
ResNet152V2,0.775397,0.92123,0.960714
Xception,0.730357,0.900198,0.947222
InceptionResNetV2,0.729167,0.901587,0.951389
InceptionV3,0.727183,0.895833,0.944841
VGG19,0.704762,0.879167,0.935119
