In [0]:
# !pip install keras
# !pip install kaggle
# !pip install kaggle-cli

In [0]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedShuffleSplit
import matplotlib.pyplot as plt
import pandas as pd
import urllib.request
import os, tarfile, zipfile
from glob import glob
import distutils
from distutils import dir_util

import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Reshape, Activation
from keras.layers import Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input


import tensorflow as tf
print(tf.test.gpu_device_name())

%matplotlib inline

In [0]:
fruit360_URL = 'https://www.dropbox.com/s/ui10iyp9kwawlf3/fruits-360_dataset_2018_02_08.zip?dl=1'

# Utility functions

In [0]:
def fetch_data(URL, DOWNLOAD_FOLDER, DOWNLOAD_FILE):
  if not os.path.isdir(DOWNLOAD_FOLDER):
   os.makedirs(DOWNLOAD_FOLDER)
  
  if not os.path.isfile(DOWNLOAD_FOLDER+DOWNLOAD_FILE):
    print('Beginning file download...')
    urllib.request.urlretrieve(URL, DOWNLOAD_FOLDER+DOWNLOAD_FILE)
    print('Done.')
  

In [0]:
def split_train_test(XY, n_splits=1, test_size=0.2, random_state=42):
    split = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)
    for train_index, test_index in split.split(XY[0], XY[1]):
        X_train, Y_train = XY[0][train_index,:], XY[1][train_index]
        X_test, Y_test = XY[0][test_index,:], XY[1][test_index]
        
    return X_train, Y_train, X_test, Y_test

In [0]:
def mkdir(p):
    if not os.path.exists(p):
        os.makedirs(p)

def copy_dirs(src, dst):
    distutils.dir_util.copy_tree(src, dst)

def link(src, dest):
    if not os.path.exists(dest):
        os.c(src, dest, target_is_directory=True)

def make_small_dataset(train_path, valid_path, classes):
    train_path_from = os.path.abspath(train_path[0])
    valid_path_from = os.path.abspath(valid_path[0])

    train_path_to = os.path.abspath(train_path[1])
    valid_path_to = os.path.abspath(valid_path[1])
    
    mkdir(train_path_to)
    mkdir(valid_path_to)
    
    for c in classes:
        if not os.path.exists(train_path_to+'/'+c):
            copy_dirs(train_path_from+'/'+c, train_path_to+'/'+c)
        if not os.path.exists(valid_path_to+'/'+c):
            copy_dirs(valid_path_from+'/'+c, valid_path_to+'/'+c)

In [0]:
def get_fruit360_data(url, zip_folder, zip_file, extract_loc):
    
    fetch_data(url, zip_folder, zip_file)
    mkdir(extract_loc)
    zip_ref = zipfile.ZipFile(zip_folder+zip_file, 'r')
    zip_ref.extractall(extract_loc)
    zip_ref.close()
    

In [0]:
def one_hot_encoder(label):
    encoder = OneHotEncoder(dtype=np.float32)
    label_1hot = encoder.fit_transform(label.reshape(-1,1))
    print('The labels are: {}'.format(np.unique(label)))
    return label_1hot

# Load data

In [0]:
root_folder = '/content/drive/app/'
# root_folder = 'D:/dev/data/'

In [0]:
get_fruit360_data(fruit360_URL, root_folder, 'fruits-360_dataset_2018_02_08.zip', root_folder+'large_set/')

In [0]:
classes = ['Apple Braeburn','Apple Golden 1','Apricot','Banana','Avocado']

train_path = [root_folder+'large_set/fruits-360/Training/', root_folder+'small_set_1/fruits-360/Training/']
valid_path = [root_folder+'large_set/fruits-360/Validation/', root_folder+'small_set_1/fruits-360/Validation/']

make_small_dataset(train_path=train_path, valid_path=valid_path, classes=classes)



In [0]:
!pwd
print(train_path[1])
print(valid_path[1])
!ls -l 'drive/app/small_set/fruits-360/'
!ls -l /content/drive/app/small_set_1/fruits-360/Training/
!ls -l /content/drive/app/small_set_1/fruits-360/Validation/

!ls -l /content/drive/app/small_set_1/fruits-360/Training/Avocado/

# Model definition

In [0]:
#@title Parameters

SELECTED = 0
train_folders = glob(train_path[SELECTED]+'/*')
train_images = glob(train_path[SELECTED]+'/*/*.jp*g')
valid_images = glob(valid_path[SELECTED]+'/*/*.jp*g')

IMAGE_SIZE = [100, 100]
BATCH_SIZE = 32
EPOCHS = 5

In [0]:
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=IMAGE_SIZE + [3])
for layer in vgg16.layers:
    layer.trainable = False
out = Flatten()(vgg16.output)
predictions = Dense(len(train_folders), activation='softmax')(out)
vgg16_fruit360 = Model(vgg16.input, outputs=predictions)
vgg16_fruit360.summary()

In [0]:
vgg16_fruit360.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [0]:
# datagen = ImageDataGenerator(rotation_range=20,
#                              width_shift_range=0.1,
#                              height_shift_range=0.1,
#                              shear_range=0.1,
#                              zoom_range=0.2,
#                              horizontal_flip=True,
#                              vertical_flip=True,
#                              preprocessing_function=preprocess_input)

datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = datagen.flow_from_directory(train_path[SELECTED], target_size=IMAGE_SIZE,
                                              batch_size=BATCH_SIZE, shuffle=True)
valid_generator = datagen.flow_from_directory(valid_path[SELECTED], target_size=IMAGE_SIZE,
                                              batch_size=BATCH_SIZE, shuffle=True)

In [0]:
vgg16_fruit360.fit_generator(train_generator, epochs=EPOCHS, validation_data=valid_generator, use_multiprocessing=True, workers=4)

In [0]:
vgg16_fruit360.fit_generator(train_generator, epochs=EPOCHS, validation_data=valid_generator, use_multiprocessing=True, workers=4)

In [0]:
vgg16_fruit360.fit_generator(train_generator, epochs=EPOCHS, validation_data=valid_generator, use_multiprocessing=True, workers=4)

In [0]:
valid_generator.

In [0]:
from sklearn.metrics import confusion_matrix
def build_confusion_matrix(data_generator, data_path=None):
    predictions = []
    targets = []
    i = 0
    for x, y in data_generator:
        i += 1
        if i%50==0:
            print(i)
            
        p = vgg16_fruit360.predict(x)
        p = np.argmax(p, axis=1)
        y = np.argmax(y, axis=1)
        predictions = np.concatenate((predictions, p))
        targets = np.concatenate((targets, y))
        
        if len(targets) >= 9673:
            break;
            
    cm = confusion_matrix(targets, predictions)
    return cm
            

In [0]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [0]:
labels = [None] * len(valid_generator.class_indices)
for k ,v in valid_generator.class_indices.items():
    labels[v] = k
    
print(labels)


In [0]:
cm = build_confusion_matrix(valid_generator)

In [0]:
plot_confusion_matrix(cm, labels)