In [1]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D,Dense,Dropout,BatchNormalization
from tensorflow.keras.models  import Sequential, Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gc
import os
import random
import math
import efficientnet.tfkeras as efn

In [2]:
from tensorflow.compat.v1.keras.backend import set_session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.compat.v1.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1



In [3]:
BATCH_SIZE = 70
IMG_SIZE = 512
NUM_CLASSES = 7
ROOT_PATH = '/home/ryan/Machine_Learning/AI4VN'
MODEL_NAME = "EfnB6_multi_label_model_combine_1.h5"

In [4]:
test_df = pd.read_csv(ROOT_PATH + '/csv_file/' + "test.csv")

In [5]:
_datagen_test_aug = ImageDataGenerator(rescale = 1./255.,
                                  fill_mode = "nearest",
                                    rotation_range=10,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    brightness_range=[0.8,1.2],
                                    zoom_range=[0.8,1.3],
                                    horizontal_flip=True)

_datagen_test = ImageDataGenerator(rescale = 1./255.)

--- Logging error ---
Traceback (most recent call last):
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 1025, in emit
    msg = self.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 869, in format
    return fmt.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 608, in format
    record.message = record.getMessage()
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 369, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
 

Found 19990 validated image filenames.


In [None]:
def get_generator(aug = False):
    test_generator = _datagen_test.flow_from_dataframe(
                    dataframe=test_df,
                    directory=ROOT_PATH +'/'+"test",
                    x_col="image_id",
                    y_col=None,
                    has_ext=True,
                    class_mode=None,
                    batch_size=BATCH_SIZE,
                    seed=42,
                    shuffle=False,
                    target_size=(IMG_SIZE, IMG_SIZE))
    if aug:
        test_generator = _datagen_test_aug.flow_from_dataframe(
                    dataframe=test_df,
                    directory=ROOT_PATH +'/'+"test",
                    x_col="image_id",
                    y_col=None,
                    has_ext=True,
                    class_mode=None,
                    batch_size=BATCH_SIZE,
                    seed=42,
                    shuffle=False,
                    target_size=(IMG_SIZE, IMG_SIZE))
    return test_generator

In [6]:
base_model =  efn.EfficientNetB6(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model.output
_x = (Dropout(0.3))(x)
predictions = Dense(NUM_CLASSES, activation="sigmoid")(_x)
model =  Model(inputs=base_model.input, outputs=predictions)

In [7]:
model.load_weights(ROOT_PATH + '/' + "models/" + MODEL_NAME)

In [8]:
def predict():
    print('Starting predict ....')
    test_generator = get_generator()
    y_pred = model.predict_generator(test_generator, verbose = 1, workers = 4, use_multiprocessing = True)
    return y_pred

Instructions for updating:
Please use Model.predict, which supports generators.


In [8]:
#test time augmentation
def test_time_augmentation(num_images, repeat = 10):
    print("test time augmentation with {} times".format(repeat))
    sum_prob = np.zeros((num_images, NUM_CLASSES))
    test_generator = get_generator(aug = True)
    for num in range(repeat):
        prob = model.predict_generator(test_generator, verbose = 1, workers = 4, use_multiprocessing = True)
        for i in range(num_images):
            for j in range(NUM_CLASSES):
                sum_prob[i][j] += prob[i][j]
    for i in range(num_images):
        for j in range(NUM_CLASSES):
            sum_prob[i][j] /= repeat
    return sum_prob

In [9]:
y_pred = test_time_augmentation(len(test_df))

test time augmentation with 10 times
Instructions for updating:
Please use Model.predict, which supports generators.


In [10]:
def get_result(get_pred = False):
    image_id = []
    label = []
    threshold = THRESHOLD
    y_class = np.zeros((NUM_CLASSES, len(test_df)))
    for x in range(NUM_CLASSES):
        y_class[x][i] = np.around(y_pred[i][x], 3)
    for i in range(len(test_df)):
        image_id.append(test_df['image_id'][i])
        if max(y_pred[i]) < threshold:
            label.append(0)
        else:
            label.append(np.argmax(y_pred[i])+1)
    
    dict = {'image_id': image_id, 'label': label}
    if get_pred:
        for i in range(NUM_CLASSES):
            dict['class_{}'.format(i+1)] = y_class[i]
    return dict

In [12]:
dict = get_result()
df = pd.DataFrame(dict)
df.to_csv("efnB6_multi_label_combine_tta.csv", index = False, header = False, sep = '\t')