In [None]:
# General Libs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import numpy as np
import random
import matplotlib.pyplot as plt
%matplotlib inline
tf.__version__

# Helper functions

## walk_on_dir

In [None]:
import os

def walk_on_dir(dir):
    print(dir)
    i = 0 
    for path, dirs, files in os.walk(dir):
        if len(dirs) > 0:
            print(len(dirs))
            print(dirs)
            _dirs = dirs
        else:
            print(path)
            print(_dirs[i])
            print(files)
            i+=1
    print(i)

## train_model

In [None]:
MAX_EPOCHS = 30
PATIENCE = 4

def train_model(model_id, model, train_generator, val_generator):

    # Salva o melhor modelo
    cb_save_best_model = keras.callbacks.ModelCheckpoint(filepath=model_id,
                                                         monitor='val_loss', 
                                                         save_best_only=True, 
                                                         verbose=1)

    # Encerra o treino antecipadamente se não houver evolução
    cb_early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                  patience= PATIENCE,
                                                  verbose=1)


    history = model.fit(
            train_generator,
            steps_per_epoch = train_generator.samples // BATCH_SIZE,
            epochs=MAX_EPOCHS,
            callbacks = [cb_save_best_model, cb_early_stop],
            validation_data=val_generator,
            verbose = 1,
            validation_steps= val_generator.samples // BATCH_SIZE)
    
    return history

## print_metrics

In [None]:
def print_metrics(model, dataset_generator):
    score = model.evaluate(test_generator)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print('Test ROC AUC:', score[2])

## plot_training_curves

In [None]:
def plot_training_curves(history):
    # Training curves
    import matplotlib.pyplot as plt

    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']

    epochs_x = range(1, len(loss_values) + 1)
    plt.figure(figsize=(10,10))
    plt.subplot(2,1,1)
    plt.plot(epochs_x, loss_values, 'bo', label='Training loss')
    plt.plot(epochs_x, val_loss_values, 'b', label='Validation loss')
    plt.title('Training and validation Loss and Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.subplot(2,1,2)
    acc_values = history_dict['accuracy']
    val_acc_values = history_dict['val_accuracy']
    plt.plot(epochs_x, acc_values, 'bo', label='Training acc')
    plt.plot(epochs_x, val_acc_values, 'b', label='Validation acc')
    #plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Acc')
    plt.legend()
    plt.show()

## plot_confusion_matrix_and_classification_report

In [None]:
import itertools

#Plot the confusion matrix. Set Normalize = True/False
def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize=(15,15))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

def plot_confusion_matrix_and_classification_report(model, dataset_generator, classes):
    Y_pred = model.predict(dataset_generator)
    y_pred = np.argmax(Y_pred, axis=1)
    
    #Confution Matrix
    cm = confusion_matrix(test_generator.classes, y_pred)
    plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix')

    #Classification Report
    print('Classification Report')
    print(classification_report(test_generator.classes, y_pred, target_names=classes))

# Dataset 300 Bird Species
* Fonte: https://www.kaggle.com/gpiosenka/100-bird-species


O dataset contém 45.622 imagens de pássaros de 300 espécies diferentes já distribuídas entre treino, validação e teste.

As imagens possuem 224 x 224 pixels com 3 canais de cores RGB.

# Exploratory Analysis

In [None]:
IMG_SHAPE = (244,244)
INPUT_SHAPE = (IMG_SHAPE[0], IMG_SHAPE[1], 3)

TRAIN_DIR = '../input/100-bird-species/train'
TEST_DIR = '../input/100-bird-species/test'
VAL_DIR = '../input/100-bird-species/valid'

BATCH_SIZE = 16

RANDOM_SEED = 33

MAX_CLASSES = 30

In [None]:
classes = os.listdir(TRAIN_DIR)
num_classes = len(classes)
num_classes

In [None]:
# Para experimentação, vamos escolher aleatóriamente N classes

import random

random.Random(RANDOM_SEED).shuffle(classes)

classes = classes[:MAX_CLASSES]
num_classes = len(classes)

classes

In [None]:
# Visualizing some sample data

sample_generator = ImageDataGenerator().flow_from_directory(TEST_DIR, shuffle=False)

plt.figure(figsize=(15,15))
for i in range(9):
    # 3x3 grid
    plt.subplot(330 + 1 + i)
    batch = sample_generator.next()[0]
    image = batch[0].astype('uint8')
    plt.imshow(image)
plt.show()

# Pre-processing

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

In [None]:
generator_args = {
    "preprocessing_function":preprocess_input,
}
flow_args = {
    "target_size":IMG_SHAPE,
    "seed":RANDOM_SEED,
    "batch_size":BATCH_SIZE,
    "class_mode":"categorical",
    "classes":classes,
}

In [None]:
img_generator = ImageDataGenerator(**generator_args)
augmented_img_generator = ImageDataGenerator(rotation_range=20,
                                             width_shift_range=0.1,
                                             height_shift_range=0.1,
                                             shear_range=0.1,
                                             zoom_range=0.1,
                                             horizontal_flip=True,
                                             fill_mode='nearest',
                                             **generator_args)

In [None]:
train_generator = augmented_img_generator.flow_from_directory(TRAIN_DIR, shuffle=True, **flow_args)
test_generator = img_generator.flow_from_directory(TEST_DIR, shuffle=False, **flow_args)
val_generator = img_generator.flow_from_directory(VAL_DIR, shuffle=False, **flow_args)

In [None]:
# Visualizing some pre-processed examples

plt.figure(figsize=(15,15))
for i in range(9):
    # 3x3 grid
    plt.subplot(330 + 1 + i)
    batch = train_generator.next()[0]
    image = batch[0].astype('uint8')
    plt.imshow(image)
plt.show()

# Transfer learning Model using ResNet50

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=INPUT_SHAPE)

x = base_model.output
x = Flatten()(x)
x = Dense(120, activation='sigmoid')(x)
x = Dropout(0.1)(x)

predictions = Dense(num_classes, activation='softmax', kernel_initializer='random_uniform')(x)

model = Model(inputs=base_model.input, 
              outputs=predictions)
model.summary()

# Freezing pretrained layers
for layer in base_model.layers:
    layer.trainable=False
    
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy', "AUC"])

In [None]:
MODEL_ID = 'resnet50.model.bruno.h5'

history = train_model(MODEL_ID, model, train_generator, val_generator)

In [None]:
plot_training_curves(history)

In [None]:
print_metrics(model, test_generator)

In [None]:
plot_confusion_matrix_and_classification_report(model, test_generator, classes)

# Resultados com ResNet50
```
Test loss: 0.1234726831316948
Test accuracy: 0.9866666793823242
Test ROC AUC: 0.9999072551727295
```
```
Classification Report
                        precision    recall  f1-score   support

       RED HEADED DUCK       1.00      1.00      1.00         5
            PINK ROBIN       1.00      1.00      1.00         5
   RED FACED CORMORANT       1.00      1.00      1.00         5
       BARRED PUFFBIRD       0.83      1.00      0.91         5
              WHIMBREL       1.00      1.00      1.00         5
          WALL CREAPER       1.00      1.00      1.00         5
      EVENING GROSBEAK       1.00      1.00      1.00         5
         MOURNING DOVE       1.00      1.00      1.00         5
     NORTHERN CARDINAL       1.00      1.00      1.00         5
 RED BEARDED BEE EATER       1.00      1.00      1.00         5
       RUDY KINGFISHER       1.00      1.00      1.00         5
  NORTHERN MOCKINGBIRD       1.00      1.00      1.00         5
               QUETZAL       1.00      1.00      1.00         5
BLACK THROATED WARBLER       1.00      1.00      1.00         5
  BLACKBURNIAM WARBLER       1.00      0.80      0.89         5
                PUFFIN       1.00      1.00      1.00         5
      TURQUOISE MOTMOT       1.00      1.00      1.00         5
      NORTHERN FLICKER       1.00      1.00      1.00         5
      EASTERN BLUEBIRD       1.00      1.00      1.00         5
       SCARLET TANAGER       1.00      1.00      1.00         5
         CEDAR WAXWING       1.00      1.00      1.00         5
                CANARY       0.83      1.00      0.91         5
 AFRICAN CROWNED CRANE       1.00      1.00      1.00         5
          MAGPIE GOOSE       1.00      1.00      1.00         5
   VERMILION FLYCATHER       1.00      1.00      1.00         5
                 ROBIN       1.00      1.00      1.00         5
       COMMON POORWILL       1.00      1.00      1.00         5
      BANDED BROADBILL       1.00      1.00      1.00         5
        HAWAIIAN GOOSE       1.00      1.00      1.00         5
  RED WINGED BLACKBIRD       1.00      0.80      0.89         5

              accuracy                           0.99       150
             macro avg       0.99      0.99      0.99       150
          weighted avg       0.99      0.99      0.99       150
```