In [None]:
# Dataset folder, the one manually modified
dataset_dir = "../input/training2/training2"

In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from PIL import Image
from tensorflow.keras.layers import BatchNormalization

tfk = tf.keras
tfkl = tf.keras.layers

In [None]:
# Random seed for reproducibility
seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Plot example images from dataset
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']
num_row = len(labels)//2
num_col = len(labels)//num_row
fig, axes = plt.subplots(num_row, num_col, figsize=(2*num_row,15*num_col))
for i in range(len(labels)):
  if i < len(labels):
    class_imgs = next(os.walk('{}/{}/'.format(dataset_dir, labels[i])))[2]
    class_img = class_imgs[0]
    img = Image.open('{}/{}/{}'.format(dataset_dir, labels[i], class_img))
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))
    ax.set_title('{}'.format(labels[i]))
plt.tight_layout()
plt.show()

In [None]:
# ImageDataGenerator read the images from the dataset

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import preprocess_input

# Split the dataset in training set and validation set
train_data_gen = ImageDataGenerator(
    validation_split=0.2
)

train_gen = train_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None,
                                               class_mode='categorical',
                                               batch_size=64,
                                               shuffle=True,
                                               seed=seed,
                                               subset='training')
X_val_ft = train_data_gen.flow_from_directory(directory=dataset_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None,
                                               class_mode='categorical',
                                               batch_size=64,
                                               shuffle=False,
                                               seed=seed,
                                               subset='validation')

In [None]:
# DATA AUGMENTATION 

aug_train_data_gen = ImageDataGenerator(rotation_range=30,
                                        height_shift_range=50,
                                        width_shift_range=50,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True, 
                                        fill_mode='reflect',
                                        validation_split=0.2)
# New training set with augmentation 
X_train_ft = aug_train_data_gen.flow_from_directory(directory=dataset_dir,
                                                       target_size=(256,256),
                                                       color_mode='rgb',
                                                       classes=None,
                                                       class_mode='categorical',
                                                       batch_size=64, #16
                                                       shuffle=True,
                                                       seed=seed,
                                                       subset= 'training')

In [None]:
# TRANSFER LEARNING AND FINE TUNING WITH XCEPTION

# Download the Xception model
supernet = tfk.applications.Xception(
    include_top=False,
    weights="imagenet",
    input_shape=(256,256,3)
)
supernet.summary()

In [None]:
# Set all Xception layers to true
supernet.trainable = True

inputs = tfk.Input(shape=(256,256,3))
# Preprocessing used to train the Xception model
x = preprocess_input(inputs)

x = supernet(x)
x = tfkl.GlobalAveragePooling2D(name='GlobalPooling')(x)

x = tfkl.Dropout(0.4, seed=seed)(x)
x = tfkl.Dense(256, activation='relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)

x = tfkl.Dropout(0.3, seed=seed)(x)
x = tfkl.Dense(128, activation='relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)

x = tfkl.Dropout(0.2, seed=seed)(x)
x = tfkl.Dense(64, activation='relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)

outputs = tfkl.Dense(14, activation='softmax', kernel_initializer = tfk.initializers.GlorotUniform(seed))(x)

tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Using Adam optimizer with learning rate 1e-5
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate=1e-5), metrics='accuracy')
tl_model.summary()

In [None]:
# Utility function to create folders and callbacks for training
from datetime import datetime

def create_folders_and_callbacks(model_name):

  exps_dir = os.path.join('data_augmentation_experiments')
  if not os.path.exists(exps_dir):
      os.makedirs(exps_dir)

  now = datetime.now().strftime('%b%d_%H-%M-%S')

  exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
  if not os.path.exists(exp_dir):
      os.makedirs(exp_dir)
      
  callbacks = []

  # Model checkpoint
  # ----------------
  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'), 
                                                     save_weights_only=False, # True to save only weights
                                                     save_best_only=False) # True to save only the best epoch 
  callbacks.append(ckpt_callback)

  # Visualize Learning on Tensorboard
  # ---------------------------------
  tb_dir = os.path.join(exp_dir, 'tb_logs')
  if not os.path.exists(tb_dir):
      os.makedirs(tb_dir)
      
  # By default shows losses and metrics for both training and validation
  tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir, 
                                               profile_batch=0,
                                               histogram_freq=1)  # if > 0 (epochs) shows weights histograms
  callbacks.append(tb_callback)

  # Early Stopping
  # --------------
  es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  callbacks.append(es_callback)

  return callbacks

In [None]:
aug_callbacks = create_folders_and_callbacks(model_name='TransandTune')

In [None]:
# Freeze same layers, train the remaining 

for i, layer in enumerate(tl_model.get_layer('xception').layers[:54]):
    layer.trainable=False
for i, layer in enumerate(tl_model.get_layer('xception').layers[66:120]):
    layer.trainable=False
tl_model.summary()

In [None]:
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-5), metrics='accuracy')

In [None]:
# Final training of the model: transfer learning and fine tuning 

tl_history = tl_model.fit(
    x = X_train_ft,
    batch_size = 64, #
    epochs = 200,
    validation_data = X_val_ft,
    callbacks = aug_callbacks,
).history

In [None]:
# Plot the loss and accuracy of the training and validation 

plt.figure(figsize=(15,5))
plt.plot(tl_history['loss'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(tl_history['val_loss'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Binary Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(tl_history['accuracy'], label='Training', alpha=.8, color='#ff7f0e')
plt.plot(tl_history['val_accuracy'], label='Validation', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
tl_model.save('Xception') 