In [1]:
!pip install -q efficientnet

In [2]:
import os
import gc
import re

import cv2
import math
import numpy as np
import scipy as sp
import pandas as pd

import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
from IPython.display import SVG
import efficientnet.tfkeras as efn
from keras.utils import plot_model
import tensorflow.keras.layers as L
from keras.utils import model_to_dot
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from kaggle_datasets import KaggleDatasets
from tensorflow.keras.applications import DenseNet121

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

import warnings
warnings.filterwarnings("ignore")

In [3]:
EPOCHS = 20
SAMPLE_LEN = 100
IMSIZES = (224, 240, 260, 300, 380, 456, 528, 600)
im_size = IMSIZES[7]
SEED = 42
LR = 0.0001
IMAGE_PATH = "../input/plant-pathology-2021-fgvc8/train_images/"
# TEST_PATH = "../input/plant-pathology-2020-fgvc7/test.csv"
TRAIN_PATH = "../input/plant-pathology-2021-fgvc8/train.csv"
SUB_PATH = "../input/plant-pathology-2021-fgvc8/sample_submission.csv"

sub = pd.read_csv(SUB_PATH)
test_data = sub.copy()
train_data = pd.read_csv(TRAIN_PATH)

In [4]:
train_data

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,healthy
1,8002cb321f8bfcdf.jpg,scab frog_eye_leaf_spot complex
2,80070f7fb5e2ccaa.jpg,scab
3,80077517781fb94f.jpg,scab
4,800cbf0ff87721f8.jpg,complex
...,...,...
18627,fffb900a92289a33.jpg,healthy
18628,fffc488fa4c0e80c.jpg,scab
18629,fffc94e092a59086.jpg,rust
18630,fffe105cf6808292.jpg,scab frog_eye_leaf_spot


In [5]:
def load_image(image_id):
    image = cv2.imread(IMAGE_PATH + image_id)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# train_images = train_data["image"].progress_apply(load_image)

In [6]:
## TPU setup

AUTO = tf.data.experimental.AUTOTUNE
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 16 * strategy.num_replicas_in_sync


In [7]:
# GCS_DS_PATH = KaggleDatasets().get_gcs_path()
GCS_DS_PATH = KaggleDatasets().get_gcs_path('plant-pathology-2021-fgvc8')
GCS_DS_PATH

'gs://kds-d474f02d2d8030b495dff957ec10bb3ee88a2dd26d6e1dff43c81969'

In [8]:
#Training Preprocessing for labels
train_data['labels'] = train_data['labels'].apply(lambda string: string.split(' '))
mlb = MultiLabelBinarizer()
df = pd.DataFrame(mlb.fit_transform(train_data['labels']),columns=mlb.classes_)
train_data = pd.concat([train_data,df],axis = 1 )
# train_data['labels'] = LabelEncoder().fit_transform(train_data['labels'])



def format_path(st):
    return GCS_DS_PATH + '/train_images/' + st

test_paths = test_data.image.apply(format_path).values
train_paths = train_data.image.apply(format_path).values

train_labels = np.float32(train_data.loc[:,'complex':'scab'].values)
# train_labels = train_data.labels.values
train_paths, valid_paths, train_labels, valid_labels =\
train_test_split(train_paths, train_labels, test_size=0.1, random_state=42)
train_labels

array([[0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       ...,
       [0., 1., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.]], dtype=float32)

In [9]:
train_data

Unnamed: 0,image,labels,complex,frog_eye_leaf_spot,healthy,powdery_mildew,rust,scab
0,800113bb65efe69e.jpg,[healthy],0,0,1,0,0,0
1,8002cb321f8bfcdf.jpg,"[scab, frog_eye_leaf_spot, complex]",1,1,0,0,0,1
2,80070f7fb5e2ccaa.jpg,[scab],0,0,0,0,0,1
3,80077517781fb94f.jpg,[scab],0,0,0,0,0,1
4,800cbf0ff87721f8.jpg,[complex],1,0,0,0,0,0
...,...,...,...,...,...,...,...,...
18627,fffb900a92289a33.jpg,[healthy],0,0,1,0,0,0
18628,fffc488fa4c0e80c.jpg,[scab],0,0,0,0,0,1
18629,fffc94e092a59086.jpg,[rust],0,0,0,0,1,0
18630,fffe105cf6808292.jpg,"[scab, frog_eye_leaf_spot]",0,1,0,0,0,1


In [10]:
def decode_image(filename, label=None, image_size=(im_size, im_size)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(img, label=None):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
    
    img = tf.image.random_brightness(img, 0.3)
    img = tf.image.random_contrast(img, 0.7, 1.3)
    img = tf.image.random_saturation(img, 0.7, 1.3)
    img = tf.image.random_hue(img, 0.1)
    
    k90 = np.random.randint(0, 3)
    img = tf.image.rot90(img, k=k90)
    
    if label is None:
        return img
    else:
        return img, label
    
    

train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)


In [11]:
train_dataset

<PrefetchDataset shapes: ((None, 600, 600, 3), (None, 6)), types: (tf.float32, tf.float32)>

In [12]:
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    lr_max = lr_max * strategy.num_replicas_in_sync

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn




lrfn = build_lrfn()
STEPS_PER_EPOCH = train_labels.shape[0] // BATCH_SIZE
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)


In [13]:
# Function to create our EfficientNetB3 model
def get_model():
    with strategy.scope():
        
        tf.random.set_seed(SEED)
        model = tf.keras.applications.EfficientNetB7(weights="imagenet", include_top=False, input_shape=(im_size, im_size, 3))
        final_model =  tf.keras.Sequential([
        model,
        tf.keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(6, 
        kernel_initializer=keras.initializers.RandomUniform(seed=SEED),
        bias_initializer=keras.initializers.Zeros(), name='dense_top', activation='sigmoid')
        ])

        opt = tf.keras.optimizers.Adam(learning_rate = LR)
        
        f1 = tfa.metrics.F1Score(num_classes=6, average='macro')

        final_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, 
              metrics= [f1,'accuracy'])
        
        return final_model

model = get_model()
checkpoint = tf.keras.callbacks.ModelCheckpoint(f'EffNetB7_EXP2_{SEED}.h5', 
                                                    monitor = 'val_loss', 
                                                    verbose = 1, 
                                                    save_best_only = True,
                                                    save_weights_only = True, 
                                                    mode = 'min')
model.summary()       
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    callbacks=[lr_schedule,checkpoint],
                    steps_per_epoch=STEPS_PER_EPOCH,
                    validation_data=valid_dataset)

model.save("EffNetB7_Exp2_final.h5")


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 19, 19, 2560)      64097687  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2560)              0         
_________________________________________________________________
dense_top (Dense)            (None, 6)                 15366     
Total params: 64,113,053
Trainable params: 63,802,326
Non-trainable params: 310,727
_________________________________________________________________
Epoch 1/20

Epoch 00001: LearningRateScheduler reducing learning rate to 1e-05.

Epoch 00001: val_loss improved from inf to 0.56729, saving model to EffNetB7_EXP2_42.h5
Epoch 2/20

Epoch 00002: LearningRateScheduler reducing learning rate to 8.8e-05.

Epoch 00002: val_los

## Alternative