In [1]:
#Tensorflow Import and GPU recognition
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8352156677948845396, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 8942431669352094109
 physical_device_desc: "device: XLA_CPU device", name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 14214336657733825449
 physical_device_desc: "device: XLA_GPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 11150726272
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 6626014761812303726
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"]

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
!unzip '/content/drive/My Drive/Progetto Advanced Machine Learning/fruits.zip' -d '/content'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_16_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_17_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_181_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_182_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_183_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_184_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_185_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_186_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_187_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_188_100.jpg  
  inflating: /content/fruits-360_dataset/fruits-360/Training/Tangelo/r_189_100.jpg  
  

In [0]:
#Importing Keras and other useful libs
import os
from os import listdir, makedirs
from os.path import join, exists, expanduser
from glob import glob
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [6]:
#Defining dataset path and saving it for future usage
path = '/content/fruits-360_dataset/fruits-360'
os.listdir(path)

['test-multiple_fruits', 'papers', 'Training', 'LICENSE', 'readme.md', 'Test']

In [7]:
#Number of pictures in Training folder
training_files = glob(os.path.join(path,'Training', '*/*.jpg'))
image_num = len(training_files)
print("Number of Images: ",image_num)

Number of Images:  60498


In [8]:
#Number of pictures in Test Folder
testing_files = glob(os.path.join(path, 'Test', '*/*.jpg'))
img_num = len(testing_files)
print("Number of Images: ", img_num)

Number of Images:  20622


In [9]:
#Print category details
image_count = []
class_names = []
print('{:18s}'.format('Class'), end='')
print('Count:')
print('-'*24)
for folder in os.listdir(os.path.join(path,'Training')):
  folder_count = len(os.listdir(os.path.join(path,'Training',folder)))
  image_count.append(folder_count)
  class_names.append(folder)
  print('{:20s}'.format(folder), end='')
  print(folder_count)
print('-'*24)
print('Number of Classes:', len(class_names))
print('Average number of images per Class: ', np.array(image_count).mean())

Class             Count:
------------------------
Grape Pink          492
Eggplant            468
Nectarine Flat      480
Apple Golden 2      492
Apple Red 3         429
Grape White 3       492
Maracuja            490
Plum 3              900
Kohlrabi            471
Potato Red Washed   453
Potato White        450
Physalis with Husk  492
Tomato 2            672
Ginger Root         99
Pear Abate          490
Grapefruit Pink     490
Lemon               492
Potato Sweet        450
Cantaloupe 1        492
Apple Pink Lady     456
Pear Forelle        702
Pear Red            666
Cherry Wax Black    492
Orange              479
Apple Golden 1      492
Apple Golden 3      481
Redcurrant          492
Carambula           490
Tomato 3            738
Apple Red 1         492
Chestnut            450
Kiwi                466
Grape White         490
Cherry 1            492
Dates               490
Apple Crimson Snow  444
Cantaloupe 2        492
Apple Red Yellow 1  492
Nut Pecan           178
Banana Red     

In [10]:
#Definitive paths
train_out_path = os.path.join(path,'Training')
test_out_path = os.path.join(path, 'Test')
print(train_out_path)
print(test_out_path)

/content/fruits-360_dataset/fruits-360/Training
/content/fruits-360_dataset/fruits-360/Test


In [0]:
#Implementing some augmentation to avoid overfitting on the training generator
train_datagenerator = ImageDataGenerator(rotation_range=30, zoom_range=0.2, horizontal_flip=True, validation_split=0.2, data_format='channels_last')
train_and_val_generator = ImageDataGenerator(rotation_range=30, zoom_range=0.2, horizontal_flip=True, data_format='channels_last')
test_datagenerator = ImageDataGenerator(data_format='channels_last')

In [12]:
#Creating Batches
image_size = (256, 256)
train_batches = train_datagenerator.flow_from_directory(train_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, subset='training', seed = 20052020)
val_batches = train_datagenerator.flow_from_directory(directory=train_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, subset='validation', seed = 20052020)
train_val_batches = train_and_val_generator.flow_from_directory(directory=train_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, seed = 20052020)
test_batches = test_datagenerator.flow_from_directory(directory=test_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, shuffle=False)

Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Found 60498 images belonging to 120 classes.
Found 20622 images belonging to 120 classes.


In [0]:
#2 Approach: Inception v3 pre-trained
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [14]:
inception_v3_model = InceptionV3(include_top=False, weights='imagenet')
#Using pretrained
for layer in inception_v3_model.layers:
  layer.trainable=False
# add preprocessing layer
inputs = Input(shape=image_size+(3,))
x = preprocess_input(inputs, data_format='channels_last')
x = inception_v3_model(x)
#Adding Layers at the top
x = GlobalAveragePooling2D(data_format='channels_last', name='avg_pool')(x)
#x = Dropout(0.8)(x)
#Adding Classification layer with softmax activation function, (needed for categorical predictions)
predictor = Dense(120 ,activation='softmax', name='predictor', kernel_regularizer=l2(0.1))(x)
inception_v3_model = Model(inputs, outputs=predictor)

# just to be sure
for layer in inception_v3_model.layers[:-1]:
  layer.trainable = False

inception_v3_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
inception_v3_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
tf_op_layer_RealDiv (TensorF [(None, 256, 256, 3)]     0         
_________________________________________________________________
tf_op_layer_Sub (TensorFlowO [(None, 256, 256, 3)]     0         
_________________________________________________________________
inception_v3 (Model)         multiple                  21802784  
_________________________________________________________________
avg_pool (GlobalAveragePooli (None, 2048)              0         
_________________________________________________________________
predictor (Dense)            (None, 120)

In [0]:
inception_v3_model.save_weights('Initial Weights.hdf5')

In [16]:
import pickle
!pip install scikit-optimize
import skopt
from skopt.utils import use_named_args
from skopt.callbacks import CheckpointSaver
from collections import namedtuple

Collecting scikit-optimize
[?25l  Downloading https://files.pythonhosted.org/packages/5c/87/310b52debfbc0cb79764e5770fa3f5c18f6f0754809ea9e2fc185e1b67d3/scikit_optimize-0.7.4-py2.py3-none-any.whl (80kB)
[K     |████                            | 10kB 17.6MB/s eta 0:00:01[K     |████████▏                       | 20kB 2.2MB/s eta 0:00:01[K     |████████████▎                   | 30kB 2.8MB/s eta 0:00:01[K     |████████████████▎               | 40kB 3.1MB/s eta 0:00:01[K     |████████████████████▍           | 51kB 2.5MB/s eta 0:00:01[K     |████████████████████████▌       | 61kB 2.7MB/s eta 0:00:01[K     |████████████████████████████▌   | 71kB 3.1MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 2.6MB/s 
Collecting pyaml>=16.9
  Downloading https://files.pythonhosted.org/packages/15/c4/1310a054d33abc318426a956e7d6df0df76a6ddfa9c66f6310274fb75d42/pyaml-20.4.0-py2.py3-none-any.whl
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-

In [17]:
#set up saving. If old data are present, they are loaded
Hyperpars = namedtuple('Hyperpars', ['learning_rate', 'alpha', 'dropout'])
HPO_save_path = '/content/drive/My Drive/Progetto Advanced Machine Learning/Model Checkpoints/'
dict_save_path = HPO_save_path + 'Inception HPO History Dictionary.pickle'
checkpoint_save_path = HPO_save_path + 'Inception HPO Checkpoint.pickle'
if os.path.isfile(dict_save_path):
  with open(dict_save_path, 'rb') as f:
    print('Loading previous history...')
    history = pickle.load(f)
else:
  print('No previous history found')
  history = dict()
if os.path.isfile(checkpoint_save_path):
  with open(checkpoint_save_path, 'rb') as f:
    print('Loading previous results...')
    results = skopt.load(checkpoint_save_path)
else:
  print('No previous results found')
checkpoint_saver = CheckpointSaver(checkpoint_save_path)

Loading previous history...
No previous results found


In [0]:
#set random seeds to get reproducible results
np.random.seed(1)
tf.random.set_seed(2)

def create_model(learning_rate, alpha, dropout):
  inception_v3_model = InceptionV3(include_top=False, weights='imagenet')
  #Using pretrained
  for layer in inception_v3_model.layers:
    layer.trainable=False
  # add preprocessing layer
  inputs = Input(shape=image_size+(3,))
  x = preprocess_input(inputs, data_format='channels_last')
  x = inception_v3_model(x)
  #Adding Layers at the top
  x = GlobalAveragePooling2D(data_format='channels_last', name='avg_pool')(x)
  x = Dropout(dropout)(x)
  #Adding Classification layer with softmax activation function, (needed for categorical predictions)
  predictor = Dense(120 ,activation='softmax', name='predictor', kernel_regularizer=l2(alpha))(x)
  model = Model(inputs, outputs=predictor)

  # just to be sure
  for layer in model.layers[:-1]:
    layer.trainable = False
  model.load_weights('Initial Weights.hdf5')

  model.compile(optimizer=Adam(learning_rate), loss='categorical_crossentropy', metrics=['accuracy']) 
  return model

#declare the search space
SEARCH_SPACE = [skopt.space.Real(0.0001, 0.1, name='learning_rate', prior='log-uniform'),
                skopt.space.Real(1e-9, 1, name='alpha', prior='log-uniform'),
                skopt.space.Real(0.0001, 0.95, name='dropout', prior='log-uniform')]

@use_named_args(SEARCH_SPACE)
def fitness(learning_rate, alpha, dropout):
  print('ε: {}, ɑ {}, dropout {}'.format(learning_rate, alpha, dropout))
  hyperpars = Hyperpars(learning_rate, alpha, dropout)
  print(len(history))
  if hyperpars in history:
    print('REPEATING POINTS!!!')
  model = create_model(learning_rate, alpha, dropout)
  #compile generators
  train_batches = train_datagenerator.flow_from_directory(train_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, subset='training', seed = 20052020)
  val_batches = train_datagenerator.flow_from_directory(directory=train_out_path, target_size=image_size, color_mode="rgb", class_mode="categorical" ,  batch_size=32, subset='validation', shuffle=False, seed = 20052020)
  #train
  early_stopping = EarlyStopping(monitor='val_loss', patience=0, restore_best_weights=True)
  training_results = model.fit(train_batches, epochs=2, verbose=1, shuffle=True, validation_data=val_batches, workers=2)
  history[hyperpars] = training_results.history
  with open(dict_save_path, 'wb') as f:
    pickle.dump(history, f)
  return training_results.history['val_accuracy'][-1]

In [0]:
#if we have old results, do a warm start. If not, cold start
try:
  known_xs = results.x_iters
  known_ys = results.func_vals
  result = skopt.forest_minimize(fitness, SEARCH_SPACE, n_calls=10, callback=checkpoint_saver, x0=known_xs, y0=known_ys)
except NameError:
  result = skopt.forest_minimize(fitness, SEARCH_SPACE, n_calls=10, callback=checkpoint_saver)

ε: 0.0980741228990046, ɑ 0.24718165404756284, dropout 0.00032332824201601514
1
REPEATING POINTS!!!
Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Epoch 1/2
Epoch 2/2
ε: 0.00014663847974417593, ɑ 0.0016713971864237532, dropout 0.00046439598717244013
1
Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Epoch 1/2
Epoch 2/2
ε: 0.007620242345824476, ɑ 0.0013583509327813378, dropout 0.0008311065412865829
2
Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Epoch 1/2
Epoch 2/2
ε: 0.006097666538935929, ɑ 6.006172096720402e-09, dropout 0.00010037070129684666
3
Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Epoch 1/2
Epoch 2/2
ε: 0.01940569588645719, ɑ 0.7308819321511685, dropout 0.0009160051636967413
4
Found 48431 images belonging to 120 classes.
Found 12067 images belonging to 120 classes.
Epoch 1/2
Epoch 2/2
ε: 0.010080137874289232