In [None]:
import pandas as pd
import os
import numpy as np
from PIL import Image

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import EfficientNetB3

## Pre-processing Pipeline

In [None]:
def load_image(path, mode='RGB'):
    return Image.open(path)


def to_array(image):
    return np.asarray(image)


def to_image(array, mode='RGB'):
    return Image.fromarray(np.uint8(array), mode=mode)


def resize(image, size):
    return tf.image.resize(image, size)

### Split data

In [None]:
df_train = pd.read_csv('../input/128128-sorghum-cultivar/train_meta.csv')
df_valid = pd.read_csv('../input/128128-sorghum-cultivar/valid_meta.csv')

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())

In [None]:
train_data_dir = '../input/128128-sorghum-cultivar/train/'
damage_images = []

for file, label in df_train.values:
    if os.path.getsize(os.path.join(train_data_dir, file)) // 1000 < 1:
        damage_images.append(file)

In [None]:
damage_images = pd.DataFrame(damage_images, columns=['image'])
damage_images

In [None]:
df_train

In [None]:
damage_indexes = []

for i, (file, label) in enumerate(df_train.values):
    if file in damage_images.image.values:
        damage_indexes.append(i)

In [None]:
df_train.drop(damage_indexes, inplace=True)

In [None]:
df_train

In [None]:
preprocess_fit_mini_batch = df_train.sample(n=50)

for file in preprocess_fit_mini_batch['image'].values:
    img = resize(load_image(os.path.join('../input/128128-sorghum-cultivar/train/', file)), (64, 64))

## Training

In [None]:
model = EfficientNetB3(include_top=False, input_shape=(64, 64, 3), weights='imagenet')

In [None]:
x_ = GlobalAveragePooling2D()(model.layers[-1].output)
x_ = Dropout(0.5)(x_)
output_layer = Dense(units=100, activation='softmax')(x_)

model = Model(model.input, output_layer)

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss=categorical_crossentropy,
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train_generator = ImageDataGenerator(rescale=1 / 255.,
                                     vertical_flip=True)

valid_generator = ImageDataGenerator(rescale=1 / 255.)

In [None]:
train_batches = train_generator.flow_from_dataframe(dataframe=df_train,
                                                    directory='../input/128128-sorghum-cultivar/train/',
                                                    x_col='image',
                                                    y_col='cultivar',
                                                    batch_size=32,
                                                    target_size=(64, 64))

validation_batches = valid_generator.flow_from_dataframe(dataframe=df_valid,
                                                         directory='../input/128128-sorghum-cultivar/train',
                                                         x_col='image',
                                                         y_col='cultivar',
                                                         batch_size=32,
                                                         target_size=(64, 64))

In [None]:
checkpoint_filepath = 'checkpoint'
os.mkdir(checkpoint_filepath)

model_checkpoint_callback = ModelCheckpoint(filepath=os.path.join(checkpoint_filepath, 'best-checkpoint.hdf5'),
                                            save_weights_only=False,
                                            monitor='val_accuracy',
                                            mode='max',
                                            save_best_only=True)

early_stop = EarlyStopping(monitor='val_loss',
                           patience=15,
                           restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=np.sqrt(0.1),
                              patience=7)

In [None]:
history = model.fit(x=train_batches,
                    validation_data=validation_batches,
                    epochs=50,
                    steps_per_epoch=3000,
                    validation_steps=460,
                    callbacks=[early_stop, reduce_lr])

In [None]:
model.evaluate(validation_batches)

In [None]:
temp_1 = pd.DataFrame(history.history)
temp_1.to_pickle('history.pkl')

In [None]:
model.save('model.hdf5')

In [None]:
train_batches.class_indices

In [None]:
import json

with open('class_indices.json', 'w') as file:
    json.dump(train_batches.class_indices, file)

In [None]:
test_preds = []

for i, file in enumerate(os.listdir('../input/sorghum-cultivar-identification-512512/test/')):
    img = resize(
        to_array(load_image(os.path.join('../input/sorghum-cultivar-identification-512512/test/', file))) / 255.,
        (64, 64))
    img_arr = np.expand_dims(to_array(img), axis=0)
    preds = np.argmax(model.predict(img_arr)[0])

    label = list(train_batches.class_indices.keys())[list(train_batches.class_indices.values()).index(preds)]

    test_preds.append([file, label])

    print(f'{i + 1}/{len(os.listdir("../input/sorghum-cultivar-identification-512512/test/"))}', end='\r')

In [None]:
test_preds = pd.DataFrame(test_preds, columns=['filename', 'cultivar'])
test_preds.to_csv('submission_11.csv', index=False)

In [None]:
test_preds