In [15]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D,Dense,Dropout,BatchNormalization
from tensorflow.keras.models  import Sequential, Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gc
import os
import random
import math
import efficientnet.tfkeras as efn

In [2]:
from tensorflow.compat.v1.keras.backend import set_session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.compat.v1.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1



In [3]:
from tensorflow.keras.applications.resnet50 import preprocess_input

In [4]:
BATCH_SIZE = 64
IMG_SIZE = 512
NUM_CLASSES = 8
ROOT_PATH = '/home/ryan/Machine_Learning/AI4VN'
IMAGE_PATH = '/home/ryan/Machine_Learning/AI4VN/train'
FOLD_PATH = '/home/ryan/Machine_Learning/AI4VN/fold'
INITAL_EPOCHS = 10
EPOCHS = 30
SPLIT = 0.2
FOLD = 0

In [5]:
train_df = pd.read_csv(ROOT_PATH + '/' + "train.csv")
print(len(train_df))

16000


In [6]:
train = pd.read_csv(FOLD_PATH + '/' + "train_fold_{}.csv".format(FOLD))
val = pd.read_csv(FOLD_PATH + '/' + "valid_fold_{}.csv".format(FOLD))
train['label'] = train['label'].astype('str')
val['label'] = val['label'].astype('str')
print("Length of train = {}, valid = {}".format(len(train), len(val)))

Length of train = 12798, valid = 3202


In [7]:
_datagen_train = ImageDataGenerator(rescale = 1./255.,
            fill_mode = "nearest",
            featurewise_center=False,
            featurewise_std_normalization=False,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            brightness_range=[0.5,1.2],
            zoom_range=[0.5,1.3],
            horizontal_flip=True,
            )

_datagen_val = ImageDataGenerator(rescale = 1./255.,
            horizontal_flip=True,
            fill_mode = "nearest"
            )

In [8]:
train_generator = _datagen_train.flow_from_dataframe(
            dataframe=train,
            directory=IMAGE_PATH,
            x_col="image_id",
            y_col="label",
            has_ext=False,
            batch_size=BATCH_SIZE,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=(IMG_SIZE, IMG_SIZE))

val_generator = _datagen_val.flow_from_dataframe(
            dataframe=val,
            directory=IMAGE_PATH,
            x_col="image_id",
            y_col="label",
            has_ext=False,
            batch_size=BATCH_SIZE,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=(IMG_SIZE, IMG_SIZE))

Found 12798 validated image filenames belonging to 8 classes.
Found 3202 validated image filenames belonging to 8 classes.


--- Logging error ---
Traceback (most recent call last):
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 1025, in emit
    msg = self.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 869, in format
    return fmt.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 608, in format
    record.message = record.getMessage()
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 369, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
 

In [9]:
base_model =  efn.EfficientNetB6(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model.output
_x = (Dropout(0.3))(x)
predictions = Dense(NUM_CLASSES, activation="softmax")(_x)
model =  Model(inputs=base_model.input, outputs=predictions)

In [10]:
base_model.trainable = False

In [11]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, 256, 256, 56) 1512        input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, 256, 256, 56) 224         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activation (Activation)    (None, 256, 256, 56) 0           stem_bn[0][0]                    
_______________________________________________________________________________________

In [20]:
def categorical_focal_loss(gamma=2., alpha=.25):
    """
    Softmax version of focal loss.
           m
      FL = ∑  -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
          c=1
      where m = number of classes, c = class and o = observation
    Parameters:
      alpha -- the same as weighing factor in balanced cross entropy
      gamma -- focusing parameter for modulating factor (1-p)
    Default value:
      gamma -- 2.0 as mentioned in the paper
      alpha -- 0.25 as mentioned in the paper
    References:
        Official paper: https://arxiv.org/pdf/1708.02002.pdf
        https://www.tensorflow.org/api_docs/python/tf/keras/backend/categorical_crossentropy
    Usage:
     model.compile(loss=[categorical_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    def categorical_focal_loss_fixed(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred: A tensor resulting from a softmax
        :return: Output tensor.
        """

        # Scale predictions so that the class probas of each sample sum to 1
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)

        # Clip the prediction value to prevent NaN's and Inf's
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)

        # Calculate Cross Entropy
        cross_entropy = -y_true * K.log(y_pred)

        # Calculate Focal Loss
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy

        # Compute mean loss in mini_batch
        return K.mean(loss, axis=1)

    return categorical_focal_loss_fixed

In [21]:
opt = tf.keras.optimizers.Adam(lr = 1e-3, decay = 3e-4)
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
cb = ModelCheckpoint('EfnB6_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor = 0.5, patience = 10, 
                                   verbose=1, mode='auto', min_delta = 0.0001, cooldown=0, min_lr=1e-5)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
model.compile(optimizer=opt, loss=categorical_focal_loss(alpha=.25, gamma=2), metrics=['accuracy'])

In [None]:
training = model.fit_generator(generator=train_generator
                                ,epochs=INITAL_EPOCHS,
                                workers = 4,
                                use_multiprocessing = True
                                ,steps_per_epoch=train_generator.samples//BATCH_SIZE
                                ,validation_data=val_generator
                                ,validation_steps=val_generator.samples//BATCH_SIZE
                                ,callbacks=[es, cb, reduceLROnPlat])

Epoch 1/10

Process Keras_worker_ForkPoolWorker-20:
Process Keras_worker_ForkPoolWorker-18:
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py", line 679, in get_index
    return _SHARED_SEQUENCES[uid][i]


In [56]:
base_model.trainable = False

fine_tune_at = 30

for layer in base_model.layers[-fine_tune_at:]:
    layer.trainable =  True

In [58]:
opt = tf.keras.optimizers.Adam(lr = 1e-4, decay = 3e-4)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
cb = ModelCheckpoint('EfnB6_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor = 0.5, patience = 10, 
                                   verbose=1, mode='auto', min_delta = 0.0001, cooldown=0, min_lr=1e-5)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
model.compile(optimizer=opt, loss=categorical_focal_loss(alpha=.25, gamma=2), metrics=['accuracy'])

In [60]:
BATCH_SIZE_FT = 32
train_generator = _datagen_train.flow_from_dataframe(
            dataframe=train,
            directory=IMAGE_PATH,
            x_col="image_id",
            y_col="label",
            has_ext=False,
            batch_size=BATCH_SIZE_FT,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=(IMG_SIZE, IMG_SIZE))

val_generator = _datagen_val.flow_from_dataframe(
            dataframe=val,
            directory=IMAGE_PATH,
            x_col="image_id",
            y_col="label",
            has_ext=False,
            batch_size=BATCH_SIZE_FT,
            seed=42,
            shuffle=True,
            class_mode="categorical",
            target_size=(IMG_SIZE, IMG_SIZE))

Found 12800 validated image filenames belonging to 8 classes.
Found 3200 validated image filenames belonging to 8 classes.


--- Logging error ---
Traceback (most recent call last):
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 1025, in emit
    msg = self.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 869, in format
    return fmt.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 608, in format
    record.message = record.getMessage()
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 369, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
 

In [61]:
training = model.fit_generator(generator=train_generator
                                ,epochs=EPOCHS,
                                workers = 4,
                                use_multiprocessing = True
                                ,steps_per_epoch=train_generator.samples//BATCH_SIZE_FT
                                ,validation_data=val_generator
                                ,validation_steps=val_generator.samples//BATCH_SIZE_FT
                                ,callbacks=[es, cb, reduceLROnPlat])

Epoch 1/20

Epoch 00001: val_accuracy improved from -inf to 0.87750, saving model to EfnB6_model.h5
Epoch 2/20

Epoch 00002: val_accuracy improved from 0.87750 to 0.88063, saving model to EfnB6_model.h5
Epoch 3/20

Epoch 00003: val_accuracy improved from 0.88063 to 0.89469, saving model to EfnB6_model.h5
Epoch 4/20

Epoch 00004: val_accuracy did not improve from 0.89469
Epoch 5/20

Epoch 00005: val_accuracy improved from 0.89469 to 0.89969, saving model to EfnB6_model.h5
Epoch 6/20

Epoch 00006: val_accuracy improved from 0.89969 to 0.90469, saving model to EfnB6_model.h5
Epoch 7/20

Epoch 00007: val_accuracy improved from 0.90469 to 0.90812, saving model to EfnB6_model.h5
Epoch 8/20

Epoch 00008: val_accuracy improved from 0.90812 to 0.91500, saving model to EfnB6_model.h5
Epoch 9/20

Epoch 00009: val_accuracy did not improve from 0.91500
Epoch 10/20

Epoch 00010: val_accuracy improved from 0.91500 to 0.91562, saving model to EfnB6_model.h5
Epoch 11/20

Epoch 00011: val_accuracy impro

In [62]:
model.load_weights(ROOT_PATH + '/' + "EfnB6_model.h5")

In [63]:
test_df = pd.read_csv(ROOT_PATH + '/' + "test.csv")

In [64]:
_datagen_test = ImageDataGenerator(rescale = 1./255.)
test_generator = _datagen_test.flow_from_dataframe(
            dataframe=test_df,
            directory=ROOT_PATH +'/'+"test",
            x_col="image_id",
            y_col=None,
            has_ext=True,
            subset="training",
            class_mode=None,
            batch_size=BATCH_SIZE,
            seed=42,
            shuffle=False,
            target_size=(IMG_SIZE, IMG_SIZE))

Found 8654 validated image filenames.


--- Logging error ---
Traceback (most recent call last):
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 1025, in emit
    msg = self.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 869, in format
    return fmt.format(record)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 608, in format
    record.message = record.getMessage()
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/logging/__init__.py", line 369, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ryan/miniconda3/envs/tuan/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
 

In [65]:
y_pred = model.predict_generator(test_generator)
y_pred = np.argmax(y_pred, axis=1)

Instructions for updating:
Please use Model.predict, which supports generators.


In [66]:
print(len(y_pred))

8654


In [67]:
len(test_df)

8654

In [68]:
image_id = []
label = []
for i in range(len(test_df)):
    image_id.append(test_df['image_id'][i])
    label.append(y_pred[i])

In [69]:
dict = {'image_id': image_id, 'label': label}

In [70]:
df = pd.DataFrame(dict)

In [71]:
df.to_csv("submission_efnb6.csv", index = False, header = False, sep = '\t')