## 1. Load library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import cv2
import tensorflow as tf
from tensorflow.keras import layers,models,optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array, smart_resize
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.preprocessing import image

## 2.Read csv and EDA

In [None]:
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
df.head()

In [None]:
class_name = df['labels'].value_counts().index
class_count = df['labels'].value_counts().values

In [None]:
df['labels'] = df['labels'].astype('category')

In [None]:
df['label_num'] = df['labels'].cat.codes

In [None]:
df.head()

In [None]:
plt.pie(class_count,
        labels=class_name,
        autopct='%1.1f%%')
plt.axis('equal')
plt.title('Proportion of each observed category')
plt.show()

## 3. Define function

> Load data function... but this code is not use this code.

In [None]:
def load_data(df):    
    datasets = ['../input/plant-pathology-2021-fgvc8/train_images', '../input/plant-pathology-2021-fgvc8/test_images']
    output = []
    
    # Iterate through training and test sets
    for dataset in datasets:
        images = []
        labels = []
        
        print("Loading {}".format(dataset))
        # Iterate through each image in our folder
        for file in tqdm(os.listdir(dataset)):
                # Get the path name of the image
                img_path = os.path.join(dataset, file)
                
                # Open and resize the img
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, IMAGE_SIZE) 
                
                # labeling
                label = df.loc[df['image']==file, 'label_num']
                
                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
                
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')   
        
        output.append((images, labels))

    return output

## 3. Image display

> In this code, I want to find how to improve classification using opencv function. But I can't find....

In [None]:
IMAGE_SIZE = (224, 224)
# IMAGE_SIZE = (600, 600)

In [None]:
def display_examples(df):
    fig = plt.figure(figsize=(10,10))
    fig.suptitle("Some examples of images of the dataset", fontsize=16)
    for i in range(25):
        plt.subplot(5,5,i+1)
        index = np.random.randint(df.shape[0])
        img_path = df.loc[index,'image']
        dataset = '../input/plant-pathology-2021-fgvc8/train_images'
        img_path = os.path.join(dataset, img_path)
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)       
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(x/255., cmap=plt.cm.binary)
        plt.xlabel(df.loc[index,'labels'])
    plt.show()

In [None]:
def display_examples_canny(df):
    fig = plt.figure(figsize=(10,10))
    fig.suptitle("Some examples of images of the dataset", fontsize=16)
    for i in range(25):
        plt.subplot(5,5,i+1)
        index = np.random.randint(df.shape[0])
        img_path = df.loc[index,'image']
        dataset = '../input/plant-pathology-2021-fgvc8/train_images'
        img_path = os.path.join(dataset, img_path)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
        edged = cv2.Canny(gray,30,200)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(edged)
        plt.xlabel(df.loc[index,'labels'])
    plt.show()

In [None]:
def display_examples_mog2(df):
    algo = 'MOG2'
    
    if algo == 'MOG2':
        backSub = cv2.createBackgroundSubtractorMOG2()
    else:
        backSub = cv2.createBackgroundSubtractorKNN()
    
    fig = plt.figure(figsize=(10,10))
    fig.suptitle("Some examples of images of the dataset", fontsize=16)
    for i in range(25):
        plt.subplot(5,5,i+1)
        index = np.random.randint(df.shape[0])
        img_path = df.loc[index,'image']
        dataset = '../input/plant-pathology-2021-fgvc8/train_images'
        img_path = os.path.join(dataset, img_path)
        image = cv2.imread(img_path)
        fgMask = backSub.apply(image)
        mask = cv2.cvtColor(fgMask, cv2.COLOR_BGR2RGB)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(mask)
        plt.xlabel(df.loc[index,'labels'])
    plt.show()

In [None]:
def display_examples_grabcut(df):
    fig = plt.figure(figsize=(10,10))
    fig.suptitle("Some examples of images of the dataset", fontsize=16)
    for i in range(25):
        plt.subplot(5,5,i+1)
        index = np.random.randint(df.shape[0])
        img_path = df.loc[index,'image']
        dataset = '../input/plant-pathology-2021-fgvc8/train_images'
        img_path = os.path.join(dataset, img_path)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, dsize=(600, 600), interpolation=cv2.INTER_AREA)
        
        rectangle = (0, 0, 500, 500)
        mask = np.zeros(image.shape[:2], np.uint8)
        bgdModel = np.zeros((1, 65), np.float64)
        fgdModel = np.zeros((1, 65), np.float64)
        cv2.grabCut(image, mask, rectangle, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
        mask_2 = np.where((mask==2) | (mask==0), 0, 1).astype('uint8')
        image_nobg = image * mask_2[:, :, np.newaxis]

        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(image_nobg)
        plt.xlabel(df.loc[index,'labels'])
    plt.show()

In [None]:
# display_examples(df)

In [None]:
# display_examples_canny(df)

In [None]:
# display_examples_mog2(df)

In [None]:
# display_examples_grabcut(df)

## 4.Image data generate

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./225,rotation_range=20,
                                                                width_shift_range=0.2,height_shift_range=0.2,
                                                                shear_range=0.2,zoom_range=0.2,horizontal_flip=True,
                                                                validation_split=0.4)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
train_dir = '../input/plant-pathology-2021-fgvc8/train_images'

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe=df,
                                                    directory=train_dir,
                                                    subset='training',
                                                    x_col="image",
                                                    y_col="labels",
                                                    shuffle=True,
                                                    target_size=IMAGE_SIZE,
                                                    batch_size=64,
                                                    class_mode='categorical')

val_generator = train_datagen.flow_from_dataframe(dataframe=df,
                                                    directory=train_dir,
                                                    subset="validation",
                                                    x_col="image",
                                                    y_col="labels",
                                                    shuffle=True,
                                                    target_size=IMAGE_SIZE,
                                                    batch_size=64,
                                                    class_mode='categorical')

In [None]:
def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (224, 224, 3)), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(12, activation=tf.nn.softmax)
])

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
def create_callbacks():
    
    cpk_path = './best_model.h5'
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=cpk_path,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
        verbose=1,
    )

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_accuracy',
        mode='max',
        factor=0.1,
        patience=3,
        verbose=0
    )

    earlystop = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        mode='max',
        patience=3, 
        verbose=1
    )
    
    callbacks = [checkpoint, reducelr, earlystop]         
    
    return callbacks

In [None]:
epochs = 100

hist = model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=50,
                           validation_data=val_generator, validation_steps=20,
                           callbacks=create_callbacks())

In [None]:
plot_hist(hist)

In [None]:
submission = pd.read_csv("../input/plant-pathology-2021-fgvc8/sample_submission.csv")
submission.head()

In [None]:
test_dir = '../input/plant-pathology-2021-fgvc8/test_images'
pred = []
model = models.load_model('./best_model.h5')

In [None]:
for image in os.listdir(test_dir):
    path = os.path.join(test_dir, image)
    img = load_img(path)
    img = img_to_array(img)
    img = smart_resize(img, (600,600))
    img = tf.reshape(img, (-1, 600, 600, 3))
    temp = model.predict(img/255.)
    temp = np.argmax(temp)
    pred = np.append(pred,temp)

In [None]:
submission_result = pd.DataFrame({'image' : submission.image, 'labels' : pred})
submission_result['labels'] = submission_result['labels'].astype(int)
class_map = dict(sorted(df[['label_num', 'labels']].values.tolist()))
submission_result['labels'] = submission_result['labels'].map(class_map)
submission_result.to_csv('submission.csv', index=False)

In [None]:
print("Competetion Complete!!")

In [None]:
# inputs = layers.Input(shape=(600, 600, 3))
# model = EfficientNetB7(weights='imagenet', input_tensor=inputs, include_top=False)
# model.trainable = False
# x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
# x = layers.Flatten(name="Flatten")(model.output)
# x = layers.Dense(64,activation='relu')(x)
# x = layers.Dense(32,activation='relu')(x)
# x = layers.Dense(16,activation='relu')(x)
# outputs = layers.Dense(12, activation="softmax", name="pred")(x)
# model = models.Model(inputs, outputs, name="EfficientB7")
# optimizer = optimizers.Adam(learning_rate=1e-2)
# model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
# epochs = 100

# hist = model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=10,
#                            validation_data=val_generator, validation_steps=5, 
#                            callbacks=create_callbacks())

In [None]:
# plot_hist(hist)

> I’m working on it.