In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os

test_df = pd.read_csv("../input/plant-pathology-2020-fgvc7/test.csv")
train_df = pd.read_csv("../input/plant-pathology-2020-fgvc7/train.csv")

In [None]:
train_df.head()


In [None]:
test_df.head()

In [None]:
train_df['sum'] = train_df['healthy']+train_df['multiple_diseases']+train_df['rust']+train_df['scab']

train_df[(train_df['sum']>1) | (train_df['sum']==0)]

In [None]:
IMAGE_DIR = '/kaggle/input/plant-pathology-2020-fgvc7/images'

train_df['path'] = IMAGE_DIR+'/'+train_df['image_id']+'.jpg'
train_df.head()

In [None]:
def get_label(x):
    if x['healthy']==1:
        return 'healthy'
    elif x['multiple_diseases']==1:
        return 'multiple_diseases'
    elif x['rust']==1:
        return 'rust'
    elif x['scab']==1:
        return 'scab'
    else : return 'None'

train_df['label'] = train_df.apply(lambda x: get_label(x), axis=1)

train_df.head()

In [None]:
print(train_df.shape)
print(test_df.shape)
print(train_df['label'].value_counts())

In [None]:
import seaborn as sns
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

def show_grid_images(image_path_list, augmentor=None, ncols=4, title=None):
    figure, axs = plt.subplots(figsize=(22,4),nrows=1, ncols=ncols)
    for i in range(ncols):
        image= cv2.cvtColor(cv2.imread(image_path_list[i]), cv2.COLOR_BGR2RGB)
        if augmentor is not None:
            image = augmentor(image=image)['image']
        axs[i].imshow(image)
        axs[i].axis('off')
        axs[i].set_title(title)
        print(image.shape)
        
rust_image_list = train_df[train_df['label']=='rust']['path'].iloc[:6].tolist()
scab_image_list = train_df[train_df['label']=='scab']['path'].iloc[:6].tolist()
healthy_image_list = train_df[train_df['label']=='healthy']['path'].iloc[:6].tolist()
multiple_image_list = train_df[train_df['label']=='multiple_diseases']['path'].iloc[:6].tolist()

show_grid_images(rust_image_list, ncols=6,title='rust')
show_grid_images(scab_image_list, ncols=6,title='scab')
show_grid_images(healthy_image_list, ncols=6,title='healthy')
show_grid_images(multiple_image_list, ncols=6,title='multiple')

In [None]:
import albumentations as A

augmentor_01 = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(scale_limit=(0.7,0.9),p=0.5,rotate_limit=30),
    A.RandomBrightnessContrast(brightness_limit=(-0.2,0.2),contrast_limit=(-0.2,0.2),p=0.5),
    A.Blur(p=0.2)
])

show_grid_images(rust_image_list, augmentor = augmentor_01,ncols=6,title='rust')
show_grid_images(scab_image_list, augmentor = augmentor_01,ncols=6,title='scab')
show_grid_images(healthy_image_list, augmentor = augmentor_01,ncols=6,title='healthy')
show_grid_images(multiple_image_list, augmentor = augmentor_01,ncols=6,title='multiple')

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
import sklearn
import cv2

class Plant_Dataset(Sequence):
    def __init__(self, image_filenames, labels, image_size=(224,224),batch_size=64,augmentor=None, shuffle=False, pre_func=None):
        self.image_filenames=image_filenames
        self.labels = labels
        self.image_size=image_size
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.shuffle = shuffle
        self.pre_func = pre_func
        
        if self.shuffle:
            self.on_epoch_end()
            
    def __len__(self):
        return int(np.ceil(len(self.image_filenames)/self.batch_size))
    
    def __getitem__(self, index):
        image_name_batch = self.image_filenames[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        else:
            label_batch = None
        image_batch = np.zeros((image_name_batch.shape[0],self.image_size[0],self.image_size[1],3),dtype='float32')
        
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]),cv2.COLOR_BGR2RGB)
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
            image = cv2.resize(image,(self.image_size[1],self.image_size[0]))
            if self.pre_func is not None:
                image = self.pre_func(image)
                
            image_batch[image_index]=image
        
        return image_batch, label_batch
    
    def on_epoch_end(self):
        if self.shuffle:
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames,self.labels)
        else:
            pass

In [None]:
sample_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')
sample_df.head()
train_df.head()

In [None]:
from sklearn.model_selection import train_test_split

def get_train_valid(train_df, valid_size=0.2, random_state=2021):
    train_path = train_df['path'].values
    train_label = train_df[['healthy', 'multiple_diseases','rust','scab']].values
    
    tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, test_size = valid_size, random_state=random_state)
    return tr_path, val_path, tr_label, val_label

In [None]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input

IMAGE_SIZE=(224,224)
BATCH_SIZE=64

tr_path, val_path, tr_label, val_label = get_train_valid(train_df, valid_size=0.2, random_state=2021)

tr_ds = Plant_Dataset(tr_path, tr_label, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, augmentor = augmentor_01, shuffle=True, pre_func = xcp_preprocess_input)
val_ds = Plant_Dataset(val_path, val_label, image_size=IMAGE_SIZE,batch_size=BATCH_SIZE, augmentor = None, shuffle=False, pre_func = xcp_preprocess_input)

tr_image_batch = next(iter(tr_ds))[0]
val_image_batch = next(iter(val_ds))[0]

print(tr_image_batch[0], val_image_batch[0])

In [None]:
from tensorflow.keras.models import Sequential , Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam , RMSprop 
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
from tensorflow.keras.metrics import AUC

from tensorflow.keras.applications import Xception, ResNet50V2, EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3
from tensorflow.keras.applications import EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7
import tensorflow as tf

def create_model(model_type = 'efficientnetb0', in_shape=(224,224,3), n_classes=4):
    input_tensor = Input(shape=in_shape)
    
    if model_type == 'resnet50v2':
        base_model = tf.keras.applications.Resnet50V2(include_top=False, weight='imagenet',input_tensor=input_tensor)
    elif model_type == 'xception':
        base_model = tf.keras.applications.Xception(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb0':
        base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb1':
        base_model = tf.keras.applications.EfficientNetB1(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb2':
        base_model = tf.keras.applications.EfficientNetB2(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb3':
        base_model = tf.keras.applications.EfficientNetB3(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb4':
        base_model = tf.keras.applications.EfficientNetB4(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb5':
        base_model = tf.keras.applications.EfficientNetB5(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb6':
        base_model = tf.keras.applications.EfficientNetB6(include_top=False, weights='imagenet', input_tensor=input_tensor)
    elif model_type == 'efficientnetb7':
        base_model = tf.keras.applications.EfficientNetB7(include_top=False, weights='imagenet', input_tensor=input_tensor)
     
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    preds = Dense(units=n_classes, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=preds)
    
    return model

In [None]:
from tensorflow.keras.metrics import AUC

xcp_model_01 = create_model(model_type='xception',in_shape=(224,224,3))
xcp_model_01.compile(optimizer=Adam(0.0001),loss ='categorical_crossentropy',metrics=[AUC()])

rlr_cb = ReduceLROnPlateau(monitor='val_loss',factor=0.2,patience=3,mode='min',verbose=1)
ely_cb = EarlyStopping(monitor='val_loss',patience=10, mode='min',verbose=1)

history = xcp_model_01.fit(tr_ds, epochs=10, steps_per_epoch = int(np.ceil(tr_path.shape[0]/BATCH_SIZE)),
                          validation_data = val_ds, validation_steps = int(np.ceil(val_path.shape[0]/BATCH_SIZE)),
                          callbacks=[rlr_cb,ely_cb],verbose=1)

In [None]:
sample_df = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')
sample_df.head()

In [None]:
IMAGE_DIR = '/kaggle/input/plant-pathology-2020-fgvc7/images'

test_df = pd.read_csv('../input/plant-pathology-2020-fgvc7/test.csv')
test_df['path'] = IMAGE_DIR+'/'+test_df['image_id']+'.jpg'
test_df.head()

In [None]:
test_path = test_df['path'].values

test_ds = Plant_Dataset(image_filenames=test_path, labels=None, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
                       augmentor = None, shuffle=None, pre_func=xcp_preprocess_input)

preds = xcp_model_01.predict(test_ds)

In [None]:
preds_df = pd.DataFrame(preds)
preds_df.columns=['healthy','multiple_diseases','rust','scab']
preds_df.head()

In [None]:
submit_df=pd.concat([test_df['image_id'],preds_df],axis=1)
submit_df.head()

In [None]:
submit_df.to_csv('submit_01.csv', index=False)

In [None]:
def make_submit_df(test_df, model):
    test_path = test_df['path'].values
    test_ds = Plant_Dataset(image_filenames=test_path, labels=None, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
                           augmentor=None, shuffle=None, pre_func=xcp_preprocess_input)
    
    preds = model.predict(test_ds)
    preds_df = pd.DataFrame(preds)
    preds_df.columns=['healthy','multiple_diseases','rust','scab']
    submit_df = pd.concat([test_df['image_id'],preds_df],axis=1)
    return submit_df

In [None]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input

IMAGE_SIZE = (320, 512)
BATCH_SIZE = 64

tr_path, val_path, tr_label, val_label = get_train_valid(train_df, valid_size=0.2, random_state=2021)

tr_ds = Plant_Dataset(tr_path, tr_label, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, 
                          augmentor=augmentor_01, shuffle=True, pre_func=xcp_preprocess_input)
val_ds = Plant_Dataset(val_path, val_label, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, 
                      augmentor=None, shuffle=False, pre_func=xcp_preprocess_input)

tr_image_batch, tr_label_batch = next(iter(tr_ds))
val_image_batch, val_label_batch = next(iter(val_ds))
print(tr_image_batch.shape, val_image_batch.shape, tr_label_batch.shape, val_label_batch.shape)
print(tr_image_batch[0], val_image_batch[0])

In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input
import tensorflow as tf

def lrfn_01(epoch):
    LR_START = 1e-5
    LR_MAX = 1e-4
    LR_RAMPUP_EPOCHS=2
    LR_SUSTAIN_EPOCHS=1
    LR_STEP_DECAY=0.75
    
    def calc_fn(epoch):
        if epoch < LR_RAMPUP_EPOCHS:
            lr = (LR_MAX-LR_START)/LR_RAMPUP_EPOCHS*epoch+LR_START
        elif epoch < LR_RAMPUP_EPOCHS+LR_SUSTAIN_EPOCHS:
            lr = LR_MAX
        else :
            lr = LR_MAX*LR_STEP_DECAY**((epoch-LR_RAMPUP_EPOCHS-LR_SUSTAIN_EPCOHS)//2)
        return lr
    return calc_fn(epoch)
def lrfn_02(epoch):
    LR_START = 1e-6
    LR_MAX = 2e-5
    LR_RAMPUP_EPOCHS = 2
    LR_SUSTAIN_EPOCHS = 1
    LR_STEP_DECAY = 0.75
    
    def calc_fn(epoch):
        if epoch < LR_RAMPUP_EPOCHS:
            lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
        elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
            lr = LR_MAX
        else:
            lr = LR_MAX * LR_STEP_DECAY**((epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)//2)
        return lr
    
    return calc_fn(epoch)

lr01_cb = tf.keras.callbacks.LearningRateScheduler(lrfn_01, verbose=1)
lr02_cb = tf.keras.callbacks.LearningRateScheduler(lrfn_02, verbose=1)
rlr_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, mode='min', verbose=1)
ely_cb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)

augmentor_01 = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(scale_limit=(0.7, 0.9), p=0.5, rotate_limit=30),
    A.RandomBrightnessContrast(brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), p=0.5),
    A.Blur(p=0.2)
])

class Config:
    MODEL_TYPE = 'xception'
    IMAGE_SIZE = (320,512)
    BATCH_SIZE=32
    N_EPOCHS=10
    IS_FINE_TUNING=False
    FIRST_EPOCHS=15
    SECOND_EPOCHS=15
    FIRST_CALLBACKS = [lr01_cb, ely_cb]
    SECOND_CALLBACKS = [lr02_cb, ely_cb]
    AUGMENTOR = augmentor_01
    PRE_FUNC = xcp_preprocess_input
    INITIAL_LR = 0.0001
    DEBUG = True

In [None]:
def train_model(train_df, config=Config):
    # 학습과 검증 데이터 이미지/레이블로 분리하고 학습/검증 Dataset 생성. 
    tr_path, val_path, tr_label, val_label = get_train_valid(train_df, valid_size=0.2, random_state=2021)
    
    tr_ds = Plant_Dataset(tr_path, tr_label, image_size=config.IMAGE_SIZE, batch_size=config.BATCH_SIZE, 
                          augmentor=config.AUGMENTOR, shuffle=True, pre_func=config.PRE_FUNC)
    val_ds = Plant_Dataset(val_path, val_label, image_size=config.IMAGE_SIZE, batch_size=config.BATCH_SIZE, 
                          augmentor=None, shuffle=False, pre_func=config.PRE_FUNC)
    if config.DEBUG:
        tr_image_batch = next(iter(tr_ds))[0]
        val_image_batch = next(iter(val_ds))[0]
        print(tr_image_batch.shape, val_image_batch.shape)
        print(tr_image_batch[0], val_image_batch[0])
        
    # model_type인자로 들어온 모델 생성. optimizer Adam적용. 
    print('#######', config.MODEL_TYPE, ' 생성 및 학습 수행 ########')
    model = create_model(model_type=config.MODEL_TYPE, in_shape=(config.IMAGE_SIZE[0], config.IMAGE_SIZE[1], 3), n_classes=4)
    model.compile(optimizer=Adam(lr=config.INITIAL_LR), loss='categorical_crossentropy', metrics=[AUC()])
    
    # 만일 Fine tuning 일 경우 아래 로직 적용. 
    if config.IS_FINE_TUNING:
        for layer in model.layers[:-4]:
            layer.trainable = False
        
        history = model.fit(tr_ds, epochs = config.FIRST_EPOCHS, validation_data=val_ds, callbacks=config.FIRST_CALLBACKS, verbose=1)
        
        for layer in model.layers:
            if config.MODEL_TYPE in 'efficientnet':
                if not isinstance(layer, layers.BatchNormalization):
                    layer.trainable=True
            else:
                layer.trainable=True
                
        history = model.fit(tr_ds, epochs = config.SECOND_EPOCHS, validation_data=val_ds, callbacks=config.SECOND_CALLBACKS, verbose=1)
        
    else:
        history = model.fit(tr_ds, epochs = config.N_EPOCHS, validation_data=val_ds, callbacks=config.FIRST_CALLBACKS, verbose=1)
        
        
    return model, history

In [None]:
xcp_model_02, history = train_model(train_df, config=Config)

In [None]:
def make_submit_df(test_df, model, config=Config):
    test_path = test_df['path'].values
    test_ds = Planet_Dataset(image_filenames=test_path, labels=None, image_size=config.IMAGE_SIZE, batch_size= config.BATCH_SIZE,
                            augmentor = None, shuffle=False, pre_func = config.PRE_FUNC)
    
    preds = model.predict(test_ds)
    preds_df = pd.DataFrame(preds)
    preds_df.columns=['healthy', 'multiple_diseases', 'rust', 'scab']
    
    submit_df = pd.concat([test_df['image_id'],preds_df],axis=1)
    return submit_df

In [None]:
submit_df = make_submit_df(test_df, xcp_model_02, config=Config)
submit_df.to_csv('submit_xcp_02.csv',index=False)