In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.preprocessing import minmax_scale
import random
import cv2
from imgaug import augmenters as iaa
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.experimental import CosineDecay
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers.experimental.preprocessing import RandomCrop,CenterCrop, RandomRotation

  data = yaml.load(f.read()) or {}


In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [9]:
# batch_size = 4
image_size = 512
input_shape = (image_size, image_size, 3)
dropout_rate = 0.4
# classes_to_predict = sorted(training_df.label.unique())
n_ouput = 5

In [22]:
def build_graph(weights=None):
    data_augmentation_layers = tf.keras.Sequential(
        [
            layers.experimental.preprocessing.RandomCrop(height=image_size, width=image_size),
            layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
            layers.experimental.preprocessing.RandomRotation(0.25),
            layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
            layers.experimental.preprocessing.RandomContrast((0.2,0.2))
        ]
    )

    efficientnet = EfficientNetB3(#weights='../model/efficientnet-b3_noisy-student.h5', 
                                  include_top=False, 
                                  input_shape=input_shape, 
                                  drop_connect_rate=dropout_rate)

    inputs = Input(shape=input_shape)
    augmented = data_augmentation_layers(inputs)
    efficientnet = efficientnet(augmented)
    pooling = layers.GlobalAveragePooling2D()(efficientnet)
    dropout = layers.Dropout(dropout_rate)(pooling)
    outputs = Dense(n_ouput, activation="softmax")(dropout)
    model = Model(inputs=inputs, outputs=outputs)
    
    if weights!=None:
        model.load_weights(weights)

    return model

In [12]:
test_folder = '../input/cassava-leaf-disease-classification/test_images/'
submission_df = pd.DataFrame(columns={"image_id","label"})
submission_df["image_id"] =  os.listdir(test_folder)
submission_df["label"] = 0

In [52]:
test_time_augmentation_layers = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2))
    ]
)

def scan_over_image(img_path, crop_size=512):
    '''
    Will extract 512x512 images covering the whole original image
    with some overlap between images
    '''
    
    img = Image.open(img_path)
    img_height, img_width = img.size
    img = np.array(img)
    
    y = random.randint(0,img_height-crop_size)
    x = random.randint(0,img_width-crop_size)

    x_img_origins = [0,img_width-crop_size]
    y_img_origins = [0,img_height-crop_size]
    img_list = []
    for x in x_img_origins:
        for y in y_img_origins:
            img_list.append(img[x:x+crop_size , y:y+crop_size,:])
  
    return np.array(img_list)

def predict_and_vote(models, image_filename, folder, TTA_runs=4):
    '''
    Run the model over 4 local areas of the given image,
    before making a decision depending on the most predicted
    disease.
    '''
    
    #apply TTA to each of the 4 images and sum all predictions for each local image
    localised_predictions = []
    local_image_list = scan_over_image(folder+image_filename)
    for local_image in local_image_list:
        duplicated_local_image = tf.convert_to_tensor(np.array([local_image for i in range(TTA_runs)]))
        augmented_images = test_time_augmentation_layers(duplicated_local_image)
            
        predictions = models[0].predict(augmented_images)
        for m in models[1:]:
            predictions += m.predict(augmented_images)
            
        localised_predictions.append(np.sum(predictions, axis=0))
    
    #sum all predictions from all 4 images and retrieve the index of the highest value
    global_predictions = np.sum(np.array(localised_predictions),axis=0)
    final_prediction = np.argmax(global_predictions)
    
    return final_prediction

def run_predictions_over_image_list(models, image_list, folder):
    predictions = []
    with tqdm(total=len(image_list)) as pbar:
        for image_filename in image_list:
            pbar.update(1)
            predictions.append(predict_and_vote(models, image_filename, folder))
    return predictions

In [53]:
model_folder = '../model/keras-efficientnet/'
model_paths = [f'{model_folder}{f}' for f in os.listdir(model_folder)]

In [54]:
models =  []
for m_p in model_paths:
    models.append(build_graph(m_p))

In [55]:
submission_df["label"] = run_predictions_over_image_list(models, submission_df["image_id"], test_folder)

100%|██████████| 1/1 [00:04<00:00,  4.80s/it]


In [56]:
submission_df.to_csv("submission.csv", index=False)

In [57]:
submission_df

Unnamed: 0,label,image_id
0,4,2216849948.jpg
