<a href="https://colab.research.google.com/github/giuliovv/ANNDL_competition_1/blob/master/giulio_models/xception_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Unzip and import

In [None]:
import os

colab = "True" #@param ['True','False']
if colab == "True":
  from google.colab import drive
  drive.mount('/gdrive')
  %cd /gdrive/MyDrive/Colab Notebooks
  if not os.path.isdir('training'):
    !unzip dataset.zip

In [None]:
import numpy as np
import tensorflow as tf

from PIL import Image
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [None]:
labels = ['Apple','Blueberry','Cherry','Corn','Grape','Orange','Peach','Pepper','Potato','Raspberry','Soybean','Squash','Strawberry','Tomato']

In [None]:
y = tf.keras.utils.to_categorical(range(len(labels)))
y

### Split data in train and test
Build a test folder, we also tried with the plant_village dataset without good results

In [None]:
import shutil

get_plants = False

if not "test" in os.listdir():
  # Build the test dataset
  print("No test directory!")
  for label in labels:
    print(label)
    image_names = [pic for pic in os.listdir("training/"+label)]
    train_images, test_images = train_test_split(image_names, test_size=0.05)
    if not 'test/'+label in os.listdir():
      try:
        os.makedirs(os.path.abspath(os.getcwd())+'/test/'+label+'/')
      except OSError:
        print("Failed to create a dir for ", '/test/'+label+'/')
      else:
        print("Success creating folder ", label)
    for name in test_images:
      shutil.move(os.path.abspath(os.getcwd())+'/training/'+label+'/'+name, os.path.abspath(os.getcwd())+'/test/'+label+'/'+name)
  print("Transfered all testing data!")

if get_plants:
  # Get plant_village dataset
  ! pip install -q tfds-nightly
  import tensorflow_datasets as tfds
  from PIL import Image
  ds = tfds.load('plant_village', split='train')
  builder = tfds.builder('plant_village')
  info = builder.info
  labels_tf = info.features["label"].names
  if not 'test_expanded/' in os.listdir():
      try:
        os.makedirs(os.path.abspath(os.getcwd())+'/test_expanded/')
      except OSError:
        print("Failed to create a dir for /test_expanded/")
      else:
        print("Success creating folder test_expanded")
  image_number = 54303
  ds = ds.take(54303)
  for el in ds:
    label_new_ds = labels_tf[el["label"].numpy()]
    for just_fruit_name in labels:
      if just_fruit_name in label_new_ds:
        if not just_fruit_name in os.listdir("test_expanded"):
          try:
            os.makedirs(os.path.abspath(os.getcwd())+'/test_expanded/'+just_fruit_name+'/')
          except OSError:
            print("Failed to create a dir for ", '/test_expanded/'+just_fruit_name+'/')
          else:
            print("Success creating folder ", just_fruit_name)
        im = Image.fromarray(el["image"].numpy())
        im.save(os.path.abspath(os.getcwd())+'/test_expanded/'+just_fruit_name+"/"+str(image_number)+".jpeg")
        break
    if image_number % 1000 == 0:
      print(image_number)
    image_number += 1


### Preprocessing and augmentations

In [None]:
import random

def preproc(img):
  '''Add random noise to an image'''
  VARIABILITY = 50
  deviation = VARIABILITY*random.random()
  noise = np.random.normal(0, deviation, img.shape)
  img += noise
  np.clip(img, 0., 255.)
  X = tf.keras.applications.inception_v3.preprocess_input(img)
  return X

In [None]:
BATCH_SIZE = 32
VALIDATE_BATCH_SIZE = BATCH_SIZE

train_generator = ImageDataGenerator(
                                    rotation_range=90,
                                    fill_mode='nearest',
                                    brightness_range=(0.2,1.8),
                                    channel_shift_range=150,
                                    shear_range=0.7,
                                    zoom_range=0.5,
                                    width_shift_range=0.3, 
                                    height_shift_range=0.3,
                                    horizontal_flip=True, 
                                    vertical_flip=True,
                                    validation_split=0.05,
                                    preprocessing_function=tf.keras.applications.xception.preprocess_input)
test_generator = ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input)

traingen = train_generator.flow_from_directory('training',
                                              target_size=(256, 256),
                                              class_mode='categorical',
                                              classes=labels,
                                              subset='training',
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              seed=42)

validgen = train_generator.flow_from_directory('training',
                                              target_size=(256, 256),
                                              class_mode='categorical',
                                              classes=labels,
                                              subset='validation',
                                              batch_size=VALIDATE_BATCH_SIZE,
                                              shuffle=True,
                                              seed=42)

testgen = test_generator.flow_from_directory('test',
                                            target_size=(256, 256),
                                            class_mode='categorical',
                                            classes=labels,
                                            batch_size=1,
                                            shuffle=False,
                                            seed=42)

In [None]:
from  sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(traingen.classes), 
            y=traingen.classes)
# Keras requires a dictionary
class_weights = {i : class_weights[i] for i in range(len(class_weights))}

In [None]:
n_steps = traingen.samples / BATCH_SIZE
n_val_steps = validgen.samples / VALIDATE_BATCH_SIZE

We also tried VGG16, Inception and Nasnet

In [None]:
# create the base pre-trained model
base_model = Xception(weights='imagenet', include_top=False, input_shape=(256, 256,3))

In [None]:
n_classes = len(labels)

This net is the last we tried with also Gaussian noise, our best results where with the same model but without Gaussian noise

In [None]:
lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01)

inputs = tf.keras.Input(shape=(256, 256, 3))
inputs = tf.keras.layers.GaussianNoise(20)(inputs)

x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = Dense(1024, activation=lrelu, name="first")(x)
x = Dense(512, activation=lrelu, name="second")(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.8)(x)
x = Dense(512, activation=lrelu, name="third")(x)
predictions = Dense(n_classes, activation='softmax', name="last")(x)

model = Model(inputs=inputs, outputs=predictions)


In [None]:
# Train only the randomly initialized layers (which were not part of the pre-trained model)
for layer in base_model.layers:
    layer.trainable = False

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
callbacks = []

early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6)
    callbacks.append(es_callback)
    
lr_plateau_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.2,
    patience=3,
    min_lr=0,
)  

callbacks.append(lr_plateau_callback)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs", histogram_freq=1)

callbacks.append(tensorboard_callback)

backup = tf.keras.callbacks.experimental.BackupAndRestore(
    "xception_noise_top_only_backup"
)

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir "./logs"

In [None]:
model.fit(traingen, epochs=12, steps_per_epoch=n_steps, batch_size=BATCH_SIZE, validation_data=validgen, callbacks=callbacks+[backup], class_weight=class_weights)

In [None]:
model.save("xception_noise_only_top")

In [None]:
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

In [None]:
# We tried also unfreezing a different number of layers but the results were worse.
for layer in model.layers[:50]:
  layer.trainable = False
for layer in model.layers[50:]:
  layer.trainable = True

from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# The other callbacks are the same but we need a new backup folder
backup2 = tf.keras.callbacks.experimental.BackupAndRestore(
    "xception_second_part_backup_noise"
)

# Train again fine tuning also some xception layers
model.fit(traingen, epochs=15, steps_per_epoch=n_steps,  batch_size=BATCH_SIZE, validation_data=validgen, callbacks=callbacks+[backup2], class_weight=class_weights)

In [None]:
model.save("xception_noise")

In [None]:
model.evaluate(testgen)

In [None]:
# Further fitting, very low lr
super_final_fit = False
if super_final_fit:
  model = tf.keras.models.load_model("xception")

  # Unfreeze everything
  for layer in model.layers:
    layer.trainable = True

  # Lr so low
  from tensorflow.keras.optimizers import Adam
  model.compile(optimizer=Adam(learning_rate=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
if super_final_fit:
  model.fit(traingen, epochs=13, steps_per_epoch=n_steps,  batch_size=BATCH_SIZE, validation_data=validgen, class_weight=class_weights)

In [None]:
if super_final_fit:
  model.save("xception_super_final")