In [1]:
import pandas as pd
import os
import shutil
import random
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from PIL import Image
from matplotlib import pyplot as plt
from sklearn.model_selection import StratifiedKFold

In [2]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.layers import AveragePooling2D

from tensorflow.keras.activations import softmax
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [3]:
from tensorflow.keras.applications import EfficientNetB0

## Pre-processing Pipeline

In [4]:
def load_image(path, mode='RGB'):
    return Image.open(path)


def to_array(image):
    return np.asarray(image)


def to_image(array, mode='RGB'):
    return Image.fromarray(np.uint8(array), mode=mode)


def resize(image, size):
    return tf.image.resize(image, size)


def resize_smallest_side_different_scales(image, smallest_side_to=(224, 384)):
    height, width = to_array(image).shape[:2]
    scaled_list = []

    if height < width:

        for scale in smallest_side_to:
            scaled = tf.image.resize(image, (scale, width))
            scaled_list.append(scaled)

        return scaled_list

    else:

        for scale in smallest_side_to:
            scaled = tf.image.resize(image, (height, scale))
            scaled_list.append(scaled)

        return scaled_list


def resize_with_aspect_ratio(image, target_width=(128, 256, 512), input_shape=(224, 224)):
    h, w = to_array(image).shape[:2]
    r = h / w
    resized = []

    for width in target_width:
        resized_h = int(r * width)
        resized_img = resize(image, (resized_h, width))
        resized.append(
            to_image(resize(tf.image.resize_with_crop_or_pad(resized_img, input_shape[0], input_shape[1]), (128, 128))))

    return resized


def bounding_boxes(offsets, dim):
    boxes = []

    for i in offsets:
        offset_height, offset_width = i
        target_height, target_width = dim
        boxes.append([offset_height, offset_width, target_height, target_width])

    return boxes


def random_sectioning(image, offsets, dims):
    boxes = bounding_boxes(offsets, dims)
    image_sections = []
    height, width = to_array(image).shape[:2]

    if (height < height // 2 + dims[0]) and (width < width // 2 + dims[1]):
        image = tf.image.resize(image, (dims[0] * 2, dims[1] * 2))

    if (height > height // 2 + dims[0]) and (width < width // 2 + dims[1]):
        image = tf.image.resize(image, (height, dims[1] * 2))

    if (height < height // 2 + dims[0]) and (width > width // 2 + dims[1]):
        image = tf.image.resize(image, (dims[0] * 2, width))

    for box in boxes:
        if random.choice([True, False]):
            section = tf.image.crop_to_bounding_box(image, box[0], box[1], box[2], box[3])
            image_sections.append(resize(section, (128, 128)))

    return image_sections


def aggressive_cropping(image, copies, crop_window, resize_smallest_side=None, output_shape=(128, 128)):
    global resized_copies

    if resize_smallest_side is not None:
        if isinstance(resize_smallest_side, int):
            img = resize(to_array(image), (resize_smallest_side, resize_smallest_side))

        if isinstance(resize_smallest_side, (list, tuple)):
            resized_copies = [tf.image.resize(to_array(image), (size, size)) for size in resize_smallest_side]

    if isinstance(crop_window, int):
        crops = [tf.image.random_crop(to_array(image), (crop_window, crop_window)) for _ in range(copies)]

        return [resize(crop_img, output_shape) for crop_img in crops]

    elif isinstance(crop_window, (list, tuple)):
        crops = [tf.image.random_crop(to_array(image), crop_window) for _ in range(copies)]

        return [resize(crop_img, output_shape) for crop_img in crops]


def change_contrast(image, lower, upper, copies=1):
    copies = [tf.image.random_contrast(image, lower=lower, upper=upper) for _ in range(copies)]
    return copies


def change_brightness(image, delta, copies=1):
    copies = [tf.image.random_brightness(image, max_delta=delta) for _ in range(copies)]
    return copies


def change_hue(image, delta, copies=1):
    copies = [tf.image.random_hue(image, max_delta=delta) for _ in range(copies)]
    return copies


def gamma_transformation(image, gamma=0.3, copies=1):
    low = 1 - gamma
    up = 1 + gamma
    copies = [tf.image.adjust_gamma(image, gamma=np.random.uniform(low, up, 1)) for _ in range(copies)]
    return copies


def change_staturate(image, delta=0.3, copies=1):
    copies = [tf.image.adjust_saturation(image, np.round(np.random.uniform(-1 * delta, 1 * delta), 2)) for _ in
              range(copies)]
    return copies


def change_sharpness(image, factor=0.3, copies=1):
    results = []

    for _ in range(copies):
        change_by = float(np.round(np.random.uniform(0, factor)))
        results.append(tfa.image.sharpness(image, change_by))

    return results


def apply_blur(image, sigma_range=2, copies=1):
    result = []

    for _ in range(copies):
        kernel_size = np.random.randint(1, 7)
        sigma = np.random.uniform(1, sigma_range)
        result.append(tfa.image.gaussian_filter2d(image, kernel_size, sigma))

    return result

In [5]:
# if not os.path.isdir('train'):
#     os.mkdir('train')

# for i, file in enumerate(os.listdir('../input/128-128-sorghum-cultivar/train/')):
#     src = os.path.join('../input/128-128-sorghum-cultivar/train/', file)
#     dst = os.path.join('train', file)

#     shutil.copyfile(src, dst)

#     print(f'{i}/{len(os.listdir("../input/128-128-sorghum-cultivar/train"))}', end='\r')

### Split data

In [6]:
df_train = pd.read_csv('../input/128128-sorghum-cultivar/train_meta.csv')
df_valid = pd.read_csv('../input/128128-sorghum-cultivar/valid_meta.csv')

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())

train size: 81453
valid size: 13144
PI_156393    1139
PI_155760    1061
PI_157030    1042
PI_152771    1022
PI_154844    1016
             ... 
PI_213900     471
PI_257600     470
PI_155885     463
PI_195754     461
PI_152591     443
Name: cultivar, Length: 100, dtype: int64
PI_156393    185
PI_155760    177
PI_152771    166
PI_157030    166
PI_22913     161
            ... 
PI_213900     76
PI_152971     76
PI_181080     76
PI_257600     75
PI_152591     74
Name: cultivar, Length: 100, dtype: int64


In [7]:
train_data_dir = '../input/128128-sorghum-cultivar/train/'
damage_images = []

for file, label in df_train.values:
    if os.path.getsize(os.path.join(train_data_dir, file))//1000<1:
        damage_images.append(file)

In [8]:
damage_images = pd.DataFrame(damage_images, columns=['image'])
damage_images

Unnamed: 0,image
0,cm-pp0-2017-06-15__14-08-45-158.png
1,cm-pp2-2017-06-15__14-08-45-158.png
2,cm-pp1-2017-06-20__12-22-46-212.png
3,cm-pp2-2017-06-20__12-22-46-212.png
4,cm-pp2-2017-06-01__12-05-25-739.png
...,...
6215,cm-1-cm-pp0-2017-06-03__13-08-25-295.png
6216,cm-1-cm-pp2-2017-06-14__11-55-57-169.png
6217,cm-1-cm-pp0-2017-06-22__13-39-34-523.png
6218,cm-1-cm-pp0-2017-06-21__13-50-39-440.png


In [9]:
df_train

Unnamed: 0,image,cultivar
0,pp2-2017-06-16__12-24-20-930.png,PI_257599
1,pp0-2017-06-02__16-48-57-866.png,PI_154987
2,pp1-2017-06-02__16-48-57-866.png,PI_154987
3,pp2-2017-06-02__16-48-57-866.png,PI_154987
4,pp1-2017-06-12__13-18-07-707.png,PI_92270
...,...,...
81448,cm-1-pp2-2017-06-15__12-45-36-309.png,PI_329338
81449,cm-1-pp1-2017-06-16__12-26-02-633.png,PI_329300
81450,cm-1-cm-pp2-2017-06-04__14-15-33-761.png,PI_157035
81451,cm-1-pp1-2017-06-23__18-42-11-656.png,PI_156268


In [10]:
damage_indexes = []

for i, (file, label) in enumerate(df_train.values):
    if file in damage_images.image.values:
        damage_indexes.append(i)

In [11]:
df_train.drop(damage_indexes, inplace=True)

In [12]:
df_train

Unnamed: 0,image,cultivar
0,pp2-2017-06-16__12-24-20-930.png,PI_257599
1,pp0-2017-06-02__16-48-57-866.png,PI_154987
2,pp1-2017-06-02__16-48-57-866.png,PI_154987
3,pp2-2017-06-02__16-48-57-866.png,PI_154987
4,pp1-2017-06-12__13-18-07-707.png,PI_92270
...,...,...
81448,cm-1-pp2-2017-06-15__12-45-36-309.png,PI_329338
81449,cm-1-pp1-2017-06-16__12-26-02-633.png,PI_329300
81450,cm-1-cm-pp2-2017-06-04__14-15-33-761.png,PI_157035
81451,cm-1-pp1-2017-06-23__18-42-11-656.png,PI_156268


## Apply Augmentations

In [13]:
# c = 0
# new_train_meta = []
# train_batch = df_train.shape[0]

# for filename, label in df_train.values:
#     if filename in os.listdir('train'):
#         if random.choice([True, False]):
#             image = tf.keras.utils.img_to_array(Image.open(os.path.join('train', filename)))
#             process = change_brightness(image, 0.4, copies=1)[0]

#             if random.choice([True, False]):
#                 process = change_contrast(process, 0.5, 2, copies=1)[0]

#             if random.choice([True, False]):
#                 process = change_hue(process, 0.2, copies=1)[0]

#             if random.choice([True, False]):
#                 process = gamma_transformation(process, 0.3, copies=1)[0]

#             if random.choice([True, False]):
#                 process = change_staturate(process, 0.3, copies=1)[0]

#             dst_file = f'cm-{filename}'
#             tf.keras.utils.array_to_img(tf.image.resize(process, (96, 96))).save(f'train/{dst_file}')
#             new_train_meta.append([dst_file, label])
            
#             del image

#         c += 1
#         print(f'{c}/{train_batch}', end='\r')

In [14]:
# train_df_ = pd.DataFrame(new_train_meta, columns=['image', 'cultivar'])
# train_df_

In [15]:
# del new_train_meta

In [16]:
# c = 0
# new_train_meta_1 = []
# combine_df = pd.concat([df_train, train_df_], ignore_index=True)
# train_batch = combine_df.shape[0]

# for filename, label in combine_df.values:
#     if filename in os.listdir('train'):
#         if random.choice([True, False]):
#             p = False
#             image = tf.keras.utils.img_to_array(Image.open(os.path.join('train', filename)))

#             if random.choice([True, False]):
#                 process = change_sharpness(image, 0.5, copies=1)[0]
#                 p = True

#             if random.choice([True, False]):
#                 process = apply_blur(image, 2.5, copies=1)[0]
#                 p = True

#             if p:
#                 dst_file = f'cm-1-{filename}'
#                 tf.keras.utils.array_to_img(tf.image.resize(process, (96, 96))).save(f'train/{dst_file}')
#                 new_train_meta_1.append([dst_file, label])
                
#             del image

#         c += 1
#         print(f'{c}/{train_batch}', end='\r')

In [17]:
# train_df_1 = pd.DataFrame(new_train_meta_1, columns=['image', 'cultivar'])
# train_df_1

In [18]:
# del new_train_meta_1

In [19]:
# df_train = pd.concat([df_train, train_df_, train_df_1], ignore_index=True)
# df_train

In [20]:
model = EfficientNetB0(include_top=False, input_shape=(96, 96, 3))

2022-05-08 07:09:04.186127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 07:09:04.276064: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 07:09:04.276968: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 07:09:04.278561: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [21]:
model.summary()

Model: "efficientnetb0"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
rescaling (Rescaling)           (None, 96, 96, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
normalization (Normalization)   (None, 96, 96, 3)    7           rescaling[0][0]                  
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 97, 97, 3)    0           normalization[0][0]              
_____________________________________________________________________________________

In [22]:
# initializer = HeUniform()

x_ = AveragePooling2D(pool_size=(3, 3))(model.layers[-1].output)
x_ = Flatten()(x_)
x_ = Dropout(0.5)(x_)
output_layer = Dense(units=100, activation=softmax)(x_)

model = Model(model.input, output_layer)

In [23]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
rescaling (Rescaling)           (None, 96, 96, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
normalization (Normalization)   (None, 96, 96, 3)    7           rescaling[0][0]                  
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 97, 97, 3)    0           normalization[0][0]              
______________________________________________________________________________________________

In [24]:
model.compile(optimizer=Adam(learning_rate=0.001),
                      loss=categorical_crossentropy,
                      metrics=['accuracy'])

In [25]:
train_generator = ImageDataGenerator(rescale=1 / 255.,
                                     width_shift_range=0.3,
                                     height_shift_range=0.3,
                                     samplewise_center=True,
                                     samplewise_std_normalization=True,
                                     vertical_flip=True)

valid_generator = ImageDataGenerator(rescale=1 / 255.)

In [26]:
train_batches = train_generator.flow_from_dataframe(dataframe=df_train,
                                                    directory='../input/128128-sorghum-cultivar/train/',
                                                    x_col='image',
                                                    y_col='cultivar',
                                                    batch_size=32,
                                                    target_size=(96, 96))

validation_batches = valid_generator.flow_from_dataframe(dataframe=df_valid,
                                                         directory='../input/128128-sorghum-cultivar/train',
                                                         x_col='image',
                                                         y_col='cultivar',
                                                         batch_size=32,
                                                         target_size=(96, 96))

Found 75233 validated image filenames belonging to 100 classes.
Found 13144 validated image filenames belonging to 100 classes.


In [27]:
checkpoint_filepath = 'checkpoint'
os.mkdir(checkpoint_filepath)

model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_filepath,
                                            save_weights_only=True,
                                            monitor='val_accuracy',
                                            mode='max',
                                            save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss',
                           patience=20,
                           restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=np.sqrt(0.1),
                              patience=8)

In [28]:
history = model.fit(x=train_batches,
                    validation_data=validation_batches,
                    epochs=100,
                    steps_per_epoch=1600,
                    callbacks=[early_stop, reduce_lr])

2022-05-08 07:09:45.230537: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100


2022-05-08 07:09:53.515045: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


In [29]:
model.evaluate(validation_batches)



[4.944950103759766, 0.012477176263928413]

In [30]:
temp_1 = pd.DataFrame(history.history)
temp_1.to_pickle('history.pkl')

In [31]:
model.save('model.hdf5')



In [32]:
train_batches.class_indices

{'PI_144134': 0,
 'PI_145619': 1,
 'PI_145626': 2,
 'PI_145633': 3,
 'PI_146890': 4,
 'PI_152591': 5,
 'PI_152651': 6,
 'PI_152694': 7,
 'PI_152727': 8,
 'PI_152728': 9,
 'PI_152730': 10,
 'PI_152733': 11,
 'PI_152751': 12,
 'PI_152771': 13,
 'PI_152816': 14,
 'PI_152828': 15,
 'PI_152860': 16,
 'PI_152862': 17,
 'PI_152923': 18,
 'PI_152961': 19,
 'PI_152965': 20,
 'PI_152966': 21,
 'PI_152967': 22,
 'PI_152971': 23,
 'PI_153877': 24,
 'PI_154750': 25,
 'PI_154844': 26,
 'PI_154846': 27,
 'PI_154944': 28,
 'PI_154987': 29,
 'PI_154988': 30,
 'PI_155516': 31,
 'PI_155760': 32,
 'PI_155885': 33,
 'PI_156178': 34,
 'PI_156217': 35,
 'PI_156268': 36,
 'PI_156326': 37,
 'PI_156330': 38,
 'PI_156393': 39,
 'PI_156463': 40,
 'PI_156487': 41,
 'PI_156871': 42,
 'PI_156890': 43,
 'PI_157030': 44,
 'PI_157035': 45,
 'PI_157804': 46,
 'PI_167093': 47,
 'PI_170787': 48,
 'PI_175919': 49,
 'PI_176766': 50,
 'PI_179749': 51,
 'PI_180348': 52,
 'PI_181080': 53,
 'PI_181083': 54,
 'PI_195754': 55,
 '

In [33]:
import json

with open('class_indices.json', 'w') as file:
    json.dump(train_batches.class_indices, file)

In [34]:
test_preds = []

for i, file in enumerate(os.listdir('../input/sorghum-cultivar-identification-512512/test/')):
    img = resize(
        to_array(load_image(os.path.join('../input/sorghum-cultivar-identification-512512/test/', file))) / 255.,
        (96, 96))
    img_arr = np.expand_dims(to_array(img), axis=0)
    preds = np.argmax(model.predict(img_arr)[0])

    label = list(train_batches.class_indices.keys())[list(train_batches.class_indices.values()).index(preds)]

    test_preds.append([file, label])

    print(f'{i + 1}/{len(os.listdir("../input/sorghum-cultivar-identification-512512/test/"))}', end='\r')

23639/23639

In [35]:
test_preds = pd.DataFrame(test_preds, columns=['filename', 'cultivar'])
test_preds.to_csv('submission_10.csv', index=False)

In [36]:
test_preds

Unnamed: 0,filename,cultivar
0,2010119363.png,PI_22913
1,1751085827.png,PI_154844
2,1369310450.png,PI_22913
3,1462167466.png,PI_22913
4,1178731942.png,PI_255239
...,...,...
23634,1760658237.png,PI_22913
23635,1880599754.png,PI_154844
23636,1693645524.png,PI_22913
23637,164830833.png,PI_22913
