Generating and augmenting images

In [39]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=180,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

baseurl = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
url = baseurl + '/data/train/apple_healthy/Apple-healthy-01251.jpg'

#print(url)

img = load_img(url)  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='preview', save_prefix='apple_healthy', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

Binary classifier

In [5]:
'''
The code snippet below is our first model, a simple stack of 3 convolution layers 
with a ReLU activation and followed by max-pooling layers. This is very similar to 
the architectures that Yann LeCun advocated in the 1990s for image classification 
(with the exception of ReLU).
'''

from keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
#from keras.backend import image_data_format
from keras import backend as K

#img_width, img_height = 150, 150
img_width, img_height = 761, 800

'''
if image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
'''

input_shape = (150, 150, 3)
#input_shape = (3, 150, 150)


model = Sequential()
#model.add(Conv2D(32, (3, 3), input_shape=(3, 150, 150)))
model.add(Conv2D(32, 3, 3, input_shape=input_shape))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(32, (3, 3)))
model.add(Conv2D(32, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(64, (3, 3)))
model.add(Conv2D(64, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))


'''
On top of it we stick two fully-connected layers. We end the model with a single 
unit and a sigmoid activation, which is perfect for a binary classification. To 
go with it we will also use the binary_crossentropy loss to train our model.
'''

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
# model.add(Activation('softmax')) # for multiclass

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# for multiclass
'''
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

'''
'''
Let's prepare our data. We will use .flow_from_directory() to generate batches 
of image data (and their labels) directly from our jpgs in their respective 
folders.
'''
 
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolders of 'data/train', and indefinitely generate
# batches of augmented image data

basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
targetdir = basedir + '/data/train'
valdir = basedir + '/data/validation'

train_generator = train_datagen.flow_from_directory(
        targetdir,  # this is the target directory (originally = 'data/train')
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        valdir, # (originally 'data/validation')
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

'''
We can now use these generators to train our model. Each epoch takes 20-30s on 
GPU and 300-400s on CPU. So it's definitely viable to run this model on CPU if 
you aren't in a hurry.
'''

#epochs=1, # original 50, 400s/epoch on cpu (20s/epoch on gpu)
'''
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=800 // batch_size)
'''

nb_epoch = 2
nb_train_samples = 320 + 2378
nb_validation_samples = 136 + 1018

model.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = nb_epoch,
        validation_data=validation_generator,
        nb_val_samples = nb_validation_samples)
model.save_weights('first_try.h5')  # always save your weights after training or during training

print('Done!')

Found 2698 images belonging to 2 classes.
Found 1154 images belonging to 2 classes.
Epoch 1/2
Epoch 2/2
Done!


In [1]:
# gets jpg count for all images in given directory and all subdirectories
def jpg_counts(dirpath):
    from os import listdir, walk
    #from os.path import isfile, join
    
    # list of all subdirectories
    dirlist = [x[0] for x in walk(dirpath)][1:]
    
    # list of all images in this directory
    imagelist = [f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    
    # get all images in all subdirectories
    #print(dirlist)
    for currdir in dirlist:
        imagelist += [f for f in listdir(currdir) if '.jpg' in f[-4:].lower()]
    
    return len(imagelist)

Multiclass classifying

In [39]:
'''
The code snippet below is our first model, a simple stack of 3 convolution layers 
with a ReLU activation and followed by max-pooling layers. This is very similar to 
the architectures that Yann LeCun advocated in the 1990s for image classification 
(with the exception of ReLU).
'''

from keras.preprocessing.image import ImageDataGenerator

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
#from keras.backend import image_data_format
from keras import backend as K

from os import walk

#img_width, img_height = 150, 150
img_width, img_height = 761, 800

input_shape = (150, 150, 3)
#input_shape = (3, 150, 150)


model = Sequential()
#model.add(Conv2D(32, (3, 3), input_shape=(3, 150, 150)))
model.add(Conv2D(32, 3, 3, input_shape=input_shape))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(32, (3, 3)))
model.add(Conv2D(32, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(64, (3, 3)))
model.add(Conv2D(64, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))


'''
On top of it we stick two fully-connected layers. We end the model with a single 
unit and a sigmoid activation, which is perfect for a binary classification. To 
go with it we will also use the binary_crossentropy loss to train our model.
'''

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
#model.add(Dense(1))
model.add(Dense(4))
#model.add(Activation('sigmoid'))
model.add(Activation('softmax')) # for multiclass

'''
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
'''

# for multiclass
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])



'''
Let's prepare our data. We will use .flow_from_directory() to generate batches 
of image data (and their labels) directly from our jpgs in their respective 
folders.
'''
 
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolders of 'data/train', and indefinitely generate
# batches of augmented image data

basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
targetdir = basedir + '/data/train'
valdir = basedir + '/data/validation'

classes = next(walk(targetdir))[1]

train_generator = train_datagen.flow_from_directory(
        targetdir,  # this is the target directory (originally = 'data/train')
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='categorical',
        classes=classes)

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        valdir, # (originally 'data/validation')
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='categorical',
        classes=classes)

'''
We can now use these generators to train our model. Each epoch takes 20-30s on 
GPU and 300-400s on CPU. So it's definitely viable to run this model on CPU if 
you aren't in a hurry.
'''

#epochs=1, # original 50, 400s/epoch on cpu (20s/epoch on gpu)
'''
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=800 // batch_size)
'''

nb_epoch = 1
nb_train_samples = jpg_counts(targetdir)
nb_validation_samples = jpg_counts(valdir)

model.fit_generator(
        train_generator,
        samples_per_epoch = nb_train_samples,
        nb_epoch = nb_epoch,
        validation_data=validation_generator,
        nb_val_samples = nb_validation_samples)
model.save_weights('first_try.h5')  # always save your weights after training or during training

print('Done!')

Found 4033 images belonging to 4 classes.
Found 1725 images belonging to 4 classes.
Epoch 1/1
Done!


In [8]:
from os import listdir

basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
#mypath = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/image_csv/'
targetdir = basedir + '/data/train'
currDir = targetdir + '/strawberry_healthy'
imagelist = listdir(currDir)
test_size = int(len(imagelist))
print(test_size)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/train/strawberry_healthy'

In [None]:
# gets jpg count for all images in given directory and all subdirectories
def jpg_counts(dirpath):
    from os import listdir, walk
    #from os.path import isfile, join
    
    # list of all subdirectories
    dirlist = [x[0] for x in walk(dirpath)][1:]
    
    # list of all images in this directory
    imagelist = [f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    
    # get all images in all subdirectories
    #print(dirlist)
    for currdir in dirlist:
        imagelist += [f for f in listdir(currdir) if '.jpg' in f[-4:].lower()]
    
    return len(imagelist)
    
    #filelist = listdir(currDir)

In [35]:
def get_num_classes(dirpath, verbose=False):
    from os import walk
    
    # get list of all direct subdirectories
    dirlist = next(walk(dirpath))[1]
    
    if verbose:
        print('Classes found:')
        for d in dirlist:
            print(d)
    
    return len(dirlist)

In [14]:
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
targetdir = basedir + '/data/train'
valdir = basedir + '/data/validation'
from os import walk
listdir = next(walk(targetdir))[1]
print(type(listdir))
listdir

<class 'list'>


['banana_Banana_speckle',
 'banana_Black_sigatoka_(Black_leaf_streak)',
 'banana_healthy']

In [25]:
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data3/train'
print('num classes', get_num_classes(basedir))
jpg_counts(basedir)

num classes 53


52283

In [34]:
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data3/train'
print('num classes', get_num_classes(basedir))
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data3/'
print('num classes', get_num_classes(basedir))

apple_Apple_scab
apple_Black_rot
apple_Cedar_apple_rust
apple_healthy
banana_Banana_speckle
banana_Black_sigatoka_(Black_leaf_streak)
banana_healthy
cabbage_Black_rot
cabbage_healthy
cassava_Brown_leaf_spot
cassava_Cassava_green_spider_mite
cherry_healthy
cherry_Powdery_mildew
corn_Cercospora_leaf_spot_Gray_leaf_spot
corn_Common_rust
corn_healthy
corn_Northern_Leaf_Blight
cucumber_Downy_mildew
cucumber_healthy
grape_Black_rot
grape_Esca_(Black_Measles_or_Spanish_Measles)
grape_healthy
grape_Leaf_blight_(Isariopsis_Leaf_Spot)
peach_Bacterial_spot
peach_healthy
Pepper,_bell_Anthracnose
Pepper,_bell_Bacterial_spot
Pepper,_bell_Bacterial_wilt
Pepper,_bell_Cercospora_leaf_spot_(Frogeye_leaf_spot)
Pepper,_bell_healthy
potato_Early_blight
potato_healthy
potato_Late_blight
soybean_Downy_mildew
soybean_Frogeye_leaf_spot
soybean_healthy
soybean_Septoria_Leaf_Blight
squash_Alternaria_leaf_spot
squash_healthy
squash_Powdery_mildew
strawberry_healthy
strawberry_Leaf_scorch
tomato_Bacterial_spot
tom

In [15]:
for d in listdir:
    currTdir = targetdir + '/' + d
    currVdir = valdir + '/' + d
    print(currTdir, jpg_counts(currTdir))
    print(currVdir, jpg_counts(currVdir))

/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/train/banana_Banana_speckle 2299
/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/validation/banana_Banana_speckle 985
/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/train/banana_Black_sigatoka_(Black_leaf_streak) 168
/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/validation/banana_Black_sigatoka_(Black_leaf_streak) 72
/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/train/banana_healthy 1151
/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak/data/validation/banana_healthy 492


In [18]:
from keras.applications import inception_v3

In [19]:
model = inception_v3.InceptionV3(include_top = False)

In [20]:
model.layers # all layers in model (in order)

[<keras.engine.topology.InputLayer at 0x1137eba90>,
 <keras.layers.convolutional.Convolution2D at 0x104d26438>,
 <keras.layers.normalization.BatchNormalization at 0x115537908>,
 <keras.layers.convolutional.Convolution2D at 0x1155a9550>,
 <keras.layers.normalization.BatchNormalization at 0x11570b518>,
 <keras.layers.convolutional.Convolution2D at 0x11576e6d8>,
 <keras.layers.normalization.BatchNormalization at 0x11579d6a0>,
 <keras.layers.pooling.MaxPooling2D at 0x11593fcf8>,
 <keras.layers.convolutional.Convolution2D at 0x1159d9d30>,
 <keras.layers.normalization.BatchNormalization at 0x1159fdb00>,
 <keras.layers.convolutional.Convolution2D at 0x115a6fc50>,
 <keras.layers.normalization.BatchNormalization at 0x115aadf60>,
 <keras.layers.pooling.MaxPooling2D at 0x115afb240>,
 <keras.layers.convolutional.Convolution2D at 0x115e85da0>,
 <keras.layers.normalization.BatchNormalization at 0x115ed07b8>,
 <keras.layers.convolutional.Convolution2D at 0x115bcecc0>,
 <keras.layers.convolutional.Con

In [21]:
from keras.models import Model

In [22]:
from keras.layers import GlobalAveragePooling2D, Dense
from keras.preprocessing.image import ImageDataGenerator

In [23]:
train_datagen = ImageDataGenerator(
                                    rotation_range=180,
                                    rescale = 1./255.,
                                    shear_range = .2,
                                    zoom_range = .2,
                                    horizontal_flip = True)

In [23]:
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dense(100, activation='relu')(x) # optimize 100 here
predictions = Dense(20, activation = 'softmax', name='leaf_preds')(x) # 20 = number of classes

full_model = Model(model.input, predictions)

In [None]:
GlobalAveragePooling2D()

In [22]:
full_model.compile('rmsprop', 'categorical_crossentropy', metrics=['accuracy'])

(None, None, None, 3)

In [None]:
full_model.fit_generator

Inception model

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.optimizers import SGD, RMSprop

def jpg_counts(dirpath):
    from os import listdir, walk
    #from os.path import isfile, join
    
    # list of all subdirectories
    dirlist = [x[0] for x in walk(dirpath)][1:]
    
    # list of all images in this directory
    imagelist = [f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    
    # get all images in all subdirectories
    #print(dirlist)
    for currdir in dirlist:
        imagelist += [f for f in listdir(currdir) if '.jpg' in f[-4:].lower()]
    
    return len(imagelist)

def get_num_classes(dirpath, verbose=False):
    from os import walk
    
    # get list of all direct subdirectories
    dirlist = next(walk(dirpath))[1]
    
    if verbose:
        print('Classes found:')
        for d in dirlist:
            print(d)
    
    return len(dirlist)

'''
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
train_data_dir = basedir + '/data/train'
validation_data_dir = basedir + '/data/validation'
'''
basedir = '/data/data'
targetdir = basedir + '/train'
valdir = basedir + '/validation'


img_width, img_height = 761, 800
nb_train_samples = jpg_counts(targetdir)
nb_validation_samples = jpg_counts(valdir)
nb_categories = get_num_classes(targetdir)
nb_epoch = 1

# create the base pre-trained model
#base_model = InceptionV3(weights='imagenet', include_top=False)
model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
#x = base_model.output
x = model.output
x = GlobalAveragePooling2D()(x)
# add a fully-connected layer
#x = Dense(1024, activation='relu', name='fc_1')(x)
x = Dense(100, activation='relu', name='fc_1')(x)
predictions = Dense(nb_categories, activation='softmax')(x)

fullmodel = Model(input=model.input, output=predictions)

# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False

#fullmodel.compile(optimizer=RMSprop(lr = .00001), loss = 'categorical_crossentropy', metrics=['accuracy'])
fullmodel.compile('rmsprop', 'categorical_crossentropy', metrics=['accuracy'])

train_datagen = ImageDataGenerator(rotation_range=180,
                                    rescale = 1./255.,
                                    shear_range = .2,
                                    zoom_range = .2,
                                    horizontal_flip = True)

# Inception has a custom image preprocess function
test_datagen = image.ImageDataGenerator(rescale=1./255)

generator_train = train_datagen.flow_from_directory(
        targetdir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')

generator_test = test_datagen.flow_from_directory(
        valdir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')

modelfull.fit_generator(generator_train,
            samples_per_epoch = nb_train_samples,
            nb_epoch = nb_epoch,
            validation_data = generator_test,
            nb_val_samples = nb_validation_samples)

#start fine-tuning
# unfreeze the top 2 inception blocks
for layer in fullmodel.layers[:172]:
   layer.trainable = False
for layer in fullmodel.layers[172:]:
   layer.trainable = True

# use SGD with a low learning rate
fullmodel.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
            loss='categorical_crossentropy', metrics=['accuracy'])

# Train the top 2 inception blocks
fullmodel.fit_generator(generator_train,
                        samples_per_epoch = nb_train_samples,
                        nb_epoch = nb_epoch,
                        validation_data = generator_test,
                        nb_val_samples = nb_validation_samples)

model_json = fullmodel.to_json()
with open('incep_3_multi.json', 'w') as json_file:
    json_file.write(model_json)
fullmodel.save_weights('incep_3_multi.h5')

Found 4033 images belonging to 4 classes.
Found 1725 images belonging to 4 classes.
Epoch 1/1


Predicting from sequential model

In [11]:
def jpg_counts(dirpath, verbose=False):
    from os import listdir, walk
    #from os.path import isfile, join
    
    # list of all subdirectories
    dirlist = [x[0] for x in walk(dirpath)][1:]
    
    # list of all images in this directory
    imagelist = [f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    if verbose:
        print(len(imagelist),'\n')
    
    # get all images in all subdirectories
    #print(dirlist)
    for currdir in dirlist:
        allfiles = [f for f in listdir(currdir)]
        imagelistsubdir = [f for f in listdir(currdir) if '.jpg' in f[-4:].lower()]
        imagelist += imagelistsubdir
        if verbose:
            if len(allfiles) != len(imagelistsubdir):
                print(currdir, len(imagelistsubdir), 'out of', len(allfiles), 'EXTRA NON JPG FILES')
            else:
                print(currdir, len(imagelistsubdir), 'out of', len(allfiles))

    if verbose:
        print(len(imagelist))
    return len(imagelist)

def get_num_classes(dirpath, verbose=False):
    from os import walk
    
    # get list of all direct subdirectories
    dirlist = next(walk(dirpath))[1]
    
    if verbose:
        print('Classes found:')
        for d in dirlist:
            print(d)
    
    return len(dirlist)

In [39]:
url = 'Soybean_test_1.JPG'

In [172]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
#from keras.backend import image_data_format
from keras import backend as K


def scale_images(dirpath, newsize = (299,299)):
    import numpy as np
    from scipy.misc import imresize, imread
    from os import listdir
    
    imagelist = [dirpath + '/' + f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    imgs = []
    
    imgs = [np.transpose(imresize(imread(img), newsize),
                        (0, 1, 2)).astype('float32')[:,:,:3] 
            for img in imagelist]
    # the (0,1,2) might not be necessary?
    # the [:,:,:3] is to prevent alpha channels from ruining the party
    
    return (np.array(imgs) / 255) , imagelist

def json_to_classes_list(dirpath, verbose=False):
    import json
    from pprint import pprint

    with open(dirpath) as json_data:
        classes = json.load(json_data)


    classes = {int(x):y for x,y in classes.items()}

    if verbose:
        pprint(classes)
        
    return classes

'''
img = load_img(url)  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)
'''

input_shape = (150, 150, 3)
num_categories = get_num_classes(targetdir)
print('Number of categories:', num_categories)
img_width, img_height = 150, 150
#classes = next(walk(targetdir))[1]
classes = json_to_classes_list('classeslist.json')


basedir = 'data'
#basedir = '/data/data2'
targetdir = basedir + '/train'
valdir = basedir + '/validation'
preddir = 'pred'



# IMAGE PROCESSING

'''
pred_datagen = ImageDataGenerator(rescale=1./255)

pred_generator = pred_datagen.flow_from_directory(
        preddir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')
'''
X, imglist = scale_images(preddir, newsize = (150,150))


# MODEL ARCHITECTURE

model = Sequential()
#model.add(Conv2D(32, (3, 3), input_shape=(3, 150, 150)))
model.add(Conv2D(32, 3, 3, input_shape=input_shape))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(32, (3, 3)))
model.add(Conv2D(32, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

#model.add(Conv2D(64, (3, 3)))
model.add(Conv2D(64, 3, 3))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
#model.add(Dense(1))
model.add(Dense(num_categories)) # number of categories
#model.add(Activation('sigmoid'))
model.add(Activation('softmax')) # for multiclass

model.load_weights('vgg16-1.h5')

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

'''
model = Model(input= (base_model.input), output= (top_model(base_model.output)))
'''

preds = model.predict(X)

for i, pred in enumerate(preds):
    j = pred.argmax()
    print('Top prediction for', imglist[i], 'is:', classes[j], 'at', pred[j])

Number of categories: 46
Top prediction for pred/Apple-Apple_scab-00008.JPG is: tomato_Leaf_Mold at 0.603696
Top prediction for pred/Apple-healthy-01257.JPG is: Pepper,_bell_Bacterial_spot at 0.338878
Top prediction for pred/Corn-healthy-00518.jpg is: banana_healthy at 0.438868
Top prediction for pred/Corn-Northern_Leaf_Blight-02879.JPG is: soybean_Frogeye_leaf_spot at 0.439087
Top prediction for pred/corn_northernleafblight_test_1.jpg is: apple_Black_rot at 0.998851
Top prediction for pred/corn_northernleafblight_test_2.jpg is: Pepper,_bell_healthy at 0.891833
Top prediction for pred/soybean_test_1.JPG is: soybean_Septoria_Leaf_Blight at 0.896122
Top prediction for pred/Squash-Powdery_mildew-00006.JPG is: strawberry_Leaf_scorch at 0.568302
Top prediction for pred/Strawberry-healthy-01120.JPG is: tomato_Target_Spot at 0.418308
Top prediction for pred/Strawberry-Leaf_scorch-00009.JPG is: squash_healthy at 0.817026
Top prediction for pred/Tomato-healthy-00458.JPG is: apple_Apple_scab at 

In [111]:
def scale_images(dirpath, newsize = (299,299)):
    import numpy as np
    from scipy.misc import imresize, imread
    from os import listdir
    
    imagelist = [dirpath + '/' + f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    imgs = []
    
    '''
    for img in imagelist:
        #print(img)
        x = imread(img)
        #print(x.shape)
        x = imresize(x, newsize)
        #print(x.shape)
        #print(x)
        x = np.transpose(x, (0,1,2)).astype('float32')
        #print(x.shape)
        x = x[:,:,:3]
        imgs.append(x)
    '''
    ''' Doesn't work with images that have an alpha channel
    imgs = [np.transpose(imresize(imread(img), newsize),
                        (0, 1, 2)).astype('float32') 
            for img in imagelist] # the (0,1,2) might not be necessary?
    '''
    
    imgs = [np.transpose(imresize(imread(img), newsize),
                        (0, 1, 2)).astype('float32')[:,:,:3] 
            for img in imagelist]
    # the (0,1,2) might not be necessary?
    # the [:,:,:3] is to prevent alpha channels from ruining the party
    
    return (np.array(imgs) / 255) , imagelist

In [112]:
preddir = 'pred'
newsize = (150,150)

In [113]:
scale_images(preddir,newsize=newsize)[0].shape

(2, 150, 150, 3)

In [None]:
img = image.load_img(img_path, target_size=(299, 299))

Predicting from InceptionV3 model

In [12]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import load_model

# from keras.models import Sequential
# from keras.layers import Conv2D, MaxPooling2D
# from keras.layers import Activation, Dropout, Flatten, Dense
# from keras import backend as K
#from keras.backend import image_data_format


def scale_images(dirpath, newsize = (299,299)):
    import numpy as np
    from scipy.misc import imresize, imread
    from os import listdir
    
    imagelist = [dirpath + '/' + f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    
    imgs = [imresize(imread(img), newsize).astype('float32')[:,:,:3] 
            for img in imagelist]
    # the [:,:,:3] is to prevent alpha channels from ruining the party
    
    return (np.array(imgs) / 255) , imagelist

def json_to_classes_list(dirpath, verbose=False):
    import json
    from pprint import pprint

    with open(dirpath) as json_data:
        classes = json.load(json_data)


    classes = {int(x):y for x,y in classes.items()}

    if verbose:
        pprint(classes)
        
    return classes

'''
img = load_img(url)  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)
'''

input_shape = (299, 299, 3)
num_categories = get_num_classes(targetdir)
print('Number of categories:', num_categories)
img_width, img_height = 299, 299
#classes = next(walk(targetdir))[1]
classes = json_to_classes_list('classeslist.json')


basedir = 'data'
#basedir = '/data/data2'
targetdir = basedir + '/train'
valdir = basedir + '/validation'
preddir = 'pred'



# IMAGE PROCESSING

X, imglist = scale_images(preddir)

# LOADING MODEL + MODEL ARCHITECTURE

modelloc = 'leafincepmodel-ft-3.h5'

model = load_model(modelloc)

'''
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
'''

'''
model = Model(input= (base_model.input), output= (top_model(base_model.output)))
'''



# for i, pred in enumerate(preds):
#     top3 = pred.argsort()[::-1][:3]
#     #j = pred.argmax()
#     print('Top prediction/s for', imglist[i], ':')
#     print('\t(1)', classes[top3[0]], 'at', pred[top3[0]])
#     print('\t(2)', classes[top3[1]], 'at', pred[top3[1]])
#     print('\t(3)', classes[top3[2]], 'at', pred[top3[2]])
#     print()

Number of categories: 46


'\nmodel = Model(input= (base_model.input), output= (top_model(base_model.output)))\n'

Predict plant (look for first "_") in prediction, set threshold to 70%?
Predict plant health/disease (output top three in that plant category if above predicted)

In [16]:
X, imglist = scale_images(preddir)
preds = model.predict(X)
for i, pred in enumerate(preds):
    top3 = pred.argsort()[::-1][:3]
    #j = pred.argmax()
    print('Top prediction/s for', imglist[i], ':')
    print('\t(1)', classes[top3[0]], 'at', pred[top3[0]])
    print('\t(2)', classes[top3[1]], 'at', pred[top3[1]])
    print('\t(3)', classes[top3[2]], 'at', pred[top3[2]])
    print()

Top prediction/s for pred/Apple-Black_rot-00639.JPG :
	(1) apple_Black_rot at 0.988216
	(2) peach_Bacterial_spot at 0.0099152
	(3) apple_Apple_scab at 0.00170946

Top prediction/s for pred/Apple-Black_rot-00673.JPG :
	(1) apple_Black_rot at 0.559321
	(2) apple_Apple_scab at 0.275908
	(3) squash_Powdery_mildew at 0.0874202

Top prediction/s for pred/Banana-Banana_speckle-05113.JPG :
	(1) banana_Banana_speckle at 0.598356
	(2) banana_healthy at 0.401183
	(3) banana_Black_sigatoka_(Black_leaf_streak) at 0.000458387

Top prediction/s for pred/Banana-Banana_speckle-05134.JPG :
	(1) banana_Banana_speckle at 0.974927
	(2) banana_healthy at 0.0249178
	(3) banana_Black_sigatoka_(Black_leaf_streak) at 0.00013229

Top prediction/s for pred/Banana-Banana_speckle-05151.JPG :
	(1) banana_Banana_speckle at 0.999971
	(2) banana_Black_sigatoka_(Black_leaf_streak) at 2.83259e-05
	(3) banana_healthy at 4.23629e-07

Top prediction/s for pred/Cabbage-Black_rot-00024.JPG :
	(1) cabbage_Black_rot at 0.999107

In [17]:
X, imglist = scale_images(preddir)
preds = model.predict(X)
for i, pred in enumerate(preds):
    top3 = pred.argsort()[::-1][:3]
    #j = pred.argmax()
    print('Top prediction/s for', imglist[i], ':')
    print('\t(1)', classes[top3[0]], 'at', pred[top3[0]])
    print('\t(2)', classes[top3[1]], 'at', pred[top3[1]])
    print('\t(3)', classes[top3[2]], 'at', pred[top3[2]])
    print()

Top prediction/s for pred/Tomato-Bacterial_spot-01592.JPG :
	(1) tomato_Bacterial_spot at 0.987302
	(2) tomato_Septoria_leaf_spot at 0.00839303
	(3) tomato_Early_blight at 0.00406035

Top prediction/s for pred/Tomato-Bacterial_spot-01593.JPG :
	(1) tomato_Bacterial_spot at 0.939456
	(2) tomato_Early_blight at 0.0518083
	(3) tomato_Septoria_leaf_spot at 0.00463675

Top prediction/s for pred/Tomato-Bacterial_spot-01594.JPG :
	(1) tomato_Bacterial_spot at 0.990683
	(2) tomato_Septoria_leaf_spot at 0.00736429
	(3) tomato_Tomato_Yellow_Leaf_Curl_disease at 0.00118015

Top prediction/s for pred/Tomato-Bacterial_spot-01597.JPG :
	(1) tomato_Tomato_Yellow_Leaf_Curl_disease at 0.449069
	(2) tomato_Late_blight at 0.374783
	(3) tomato_Bacterial_spot at 0.144853

Top prediction/s for pred/Tomato-Bacterial_spot-01598.JPG :
	(1) tomato_Tomato_Yellow_Leaf_Curl_disease at 0.864269
	(2) tomato_Late_blight at 0.0678573
	(3) tomato_Bacterial_spot at 0.0665755

Top prediction/s for pred/Tomato-Bacterial_s

In [22]:
X, imglist = scale_images(preddir)
preds = model.predict(X)
for i, pred in enumerate(preds):
    top3 = pred.argsort()[::-1][:3]
    #j = pred.argmax()
    print('Top prediction/s for', imglist[i], ':')
    print('\t(1)', classes[top3[0]], 'at', pred[top3[0]])
    print('\t(2)', classes[top3[1]], 'at', pred[top3[1]])
    print('\t(3)', classes[top3[2]], 'at', pred[top3[2]])
    print()

(10, 299, 299, 3)
Top prediction/s for pred/Apple-01.JPG :
	(1) apple_healthy at 0.998036
	(2) apple_Black_rot at 0.00186758
	(3) grape_healthy at 5.92782e-05

Top prediction/s for pred/Grape-01.JPG :
	(1) grape_healthy at 0.650656
	(2) apple_healthy at 0.331087
	(3) apple_Black_rot at 0.0151621

Top prediction/s for pred/Strawberry-01.JPG :
	(1) strawberry_healthy at 0.999998
	(2) strawberry_Leaf_scorch at 1.60822e-06
	(3) tomato_Late_blight at 1.94541e-07

Top prediction/s for pred/Strawberry-02.JPG :
	(1) strawberry_Leaf_scorch at 0.999986
	(2) grape_Esca_(Black_Measles_or_Spanish_Measles) at 1.23321e-05
	(3) tomato_Septoria_leaf_spot at 4.94293e-07

Top prediction/s for pred/Strawberry-03.JPG :
	(1) strawberry_Leaf_scorch at 0.985764
	(2) tomato_Tomato_Yellow_Leaf_Curl_disease at 0.0126932
	(3) potato_Early_blight at 0.000900258

Top prediction/s for pred/Strawberry-04.JPG :
	(1) strawberry_Leaf_scorch at 0.999787
	(2) tomato_Early_blight at 0.00013004
	(3) tomato_Septoria_leaf_spo

In [183]:
for i in classes:
    print(classes[i])

Pepper,_bell_Bacterial_spot
Pepper,_bell_healthy
apple_Apple_scab
apple_Black_rot
apple_Cedar_apple_rust
apple_healthy
banana_Banana_speckle
banana_Black_sigatoka_(Black_leaf_streak)
banana_healthy
cabbage_Black_rot
cabbage_healthy
cherry_Powdery_mildew
cherry_healthy
corn_Cercospora_leaf_spot_Gray_leaf_spot
corn_Common_rust
corn_Northern_Leaf_Blight
corn_healthy
cucumber_Downy_mildew
cucumber_healthy
grape_Black_rot
grape_Esca_(Black_Measles_or_Spanish_Measles)
grape_Leaf_blight_(Isariopsis_Leaf_Spot)
grape_healthy
peach_Bacterial_spot
peach_healthy
potato_Early_blight
potato_Late_blight
potato_healthy
soybean_Downy_mildew
soybean_Frogeye_leaf_spot
soybean_Septoria_Leaf_Blight
soybean_healthy
squash_Powdery_mildew
squash_healthy
strawberry_Leaf_scorch
strawberry_healthy
tomato_Bacterial_spot
tomato_Early_blight
tomato_Late_blight
tomato_Leaf_Mold
tomato_Septoria_leaf_spot
tomato_Spider_mites??Two-spotted_spider_mite
tomato_Target_Spot
tomato_Tomato_Yellow_Leaf_Curl_disease
tomato_Toma

In [217]:
def parse_plant_name(plantstring, replace_underscores=None, remove_parentheses=False):
    
    ## basic parsing
    underscore_i = plantstring.find('_')
    plantsp = plantstring[:underscore_i]
    healthstatus = plantstring[underscore_i+1:]
    
    ## special cases
    # special case for bell peppers (2 cases)
    if plantsp == 'Pepper,':
        plantsp = 'bell_pepper'
        underscore_i = healthstatus.find('_')
        healthstatus = healthstatus[underscore_i+1:]
    
    # special case for 'tomato_Spider_mites??Two-spotted_spider_mite' (1 case)
    if healthstatus == 'Spider_mites??Two-spotted_spider_mite':
        healthstatus = 'Spider_mites_(Two-spotted_spider_mite)'
    
    ## extra parameters
    if replace_underscores != None:
        plantsp = plantsp.replace('_', replace_underscores)
        healthstatus = healthstatus.replace('_', replace_underscores)
        
    if remove_parentheses:
        plantsp = plantsp.replace('(','')
        plantsp = plantsp.replace(')','')
        healthstatus = healthstatus.replace('(','')
        healthstatus = healthstatus.replace(')','')
    
    return plantsp, healthstatus
    

In [218]:
parse_plant_name('tomato_Spider_mites??Two-spotted_spider_mite')

('tomato', 'Spider_mites_Two-spotted_spider_mite')

In [219]:
for i in classes:
    print(parse_plant_name(classes[i]))

('bell_pepper', 'Bacterial_spot')
('bell_pepper', 'healthy')
('apple', 'Apple_scab')
('apple', 'Black_rot')
('apple', 'Cedar_apple_rust')
('apple', 'healthy')
('banana', 'Banana_speckle')
('banana', 'Black_sigatoka_(Black_leaf_streak)')
('banana', 'healthy')
('cabbage', 'Black_rot')
('cabbage', 'healthy')
('cherry', 'Powdery_mildew')
('cherry', 'healthy')
('corn', 'Cercospora_leaf_spot_Gray_leaf_spot')
('corn', 'Common_rust')
('corn', 'Northern_Leaf_Blight')
('corn', 'healthy')
('cucumber', 'Downy_mildew')
('cucumber', 'healthy')
('grape', 'Black_rot')
('grape', 'Esca_(Black_Measles_or_Spanish_Measles)')
('grape', 'Leaf_blight_(Isariopsis_Leaf_Spot)')
('grape', 'healthy')
('peach', 'Bacterial_spot')
('peach', 'healthy')
('potato', 'Early_blight')
('potato', 'Late_blight')
('potato', 'healthy')
('soybean', 'Downy_mildew')
('soybean', 'Frogeye_leaf_spot')
('soybean', 'Septoria_Leaf_Blight')
('soybean', 'healthy')
('squash', 'Powdery_mildew')
('squash', 'healthy')
('strawberry', 'Leaf_sco

Python script for web app

In [21]:
## Imports
from keras.preprocessing.image import load_img
from keras.models import load_model

import numpy as np
from scipy.misc import imresize, imread
from os import listdir

import json
from pprint import pprint

## Helper fxns

def scale_images(dirpath, newsize = (299,299)):
    imagelist = [dirpath + '/' + f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    
    imgs = [imresize(imread(img), newsize).astype('float32')[:,:,:3] 
            for img in imagelist]
    # the [:,:,:3] is to prevent alpha channels from ruining the party
    
    print(np.array(imgs).shape)
    
    
    return (np.array(imgs) / 255) , imagelist

def json_to_classes_list(dirpath, verbose=False):
    with open(dirpath) as json_data:
        classes = json.load(json_data)

    classes = {int(x):y for x,y in classes.items()}

    if verbose:
        pprint(classes)
        
    return classes

def parse_plant_name(plantstring, replace_underscores=None, remove_parentheses=False):
    ## basic parsing
    underscore_i = plantstring.find('_')
    plantsp = plantstring[:underscore_i]
    healthstatus = plantstring[underscore_i+1:]
    
    ## special cases
    # special case for bell peppers (2 cases)
    if plantsp == 'Pepper,':
        plantsp = 'bell_pepper'
        underscore_i = healthstatus.find('_')
        healthstatus = healthstatus[underscore_i+1:]
    
    # special case for 'tomato_Spider_mites??Two-spotted_spider_mite' (1 case)
    if healthstatus == 'Spider_mites??Two-spotted_spider_mite':
        healthstatus = 'Spider_mites_(Two-spotted_spider_mite)'
    
    ## extra parameters
    if replace_underscores != None:
        plantsp = plantsp.replace('_', replace_underscores)
        healthstatus = healthstatus.replace('_', replace_underscores)
        
    if remove_parentheses:
        plantsp = plantsp.replace('(','')
        plantsp = plantsp.replace(')','')
        healthstatus = healthstatus.replace('(','')
        healthstatus = healthstatus.replace(')','')
    
    return plantsp, healthstatus




## LOADING MODEL + MODEL ARCHITECTURE

modelloc = 'leafincepmodel-ft-2.h5'

model = load_model(modelloc)

## IMAGE PROCESSING

#input_shape = (299, 299, 3)
img_width, img_height = 299, 299
classes = json_to_classes_list('classeslist.json')
preddir = 'pred'
X, imglist = scale_images(preddir)

## PREDICTIONS

preds = model.predict(X)
for i, pred in enumerate(preds):
    top3 = pred.argsort()[::-1][:3]
    print('Top prediction/s for', imglist[i], ':')
    print('\t(1)', classes[top3[0]], 'at', pred[top3[0]])
    print('\t(2)', classes[top3[1]], 'at', pred[top3[1]])
    print('\t(3)', classes[top3[2]], 'at', pred[top3[2]])
    print()



OSError: Unable to open file (Unable to open file: name = 'leafincepmodel-ft-2.h5', errno = 2, error message = 'no such file or directory', flags = 0, o_flags = 0)

In [None]:
from keras.models import load_model

modelloc = '~/model/model.h5'

model = load_model(modelloc)

In [3]:
from keras.models import load_model

modelloc = 'leafincepmodel-ft-3.h5'

fullmodel = load_model(modelloc)

In [4]:
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
targetdir = basedir + '/data/train'
valdir = basedir + '/data/validation'

In [7]:
from keras.preprocessing.image import ImageDataGenerator

img_width, img_height = 299, 299 # inception likes 299x299
batch_size = 32

test_datagen = ImageDataGenerator(rescale=1./255)

generator_test = test_datagen.flow_from_directory(
        valdir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')

Found 22099 images belonging to 46 classes.


In [8]:
fullmodel.evaluate_generator(generator_test, batch_size)

[0.028518684208393097, 1.0]

In [9]:
fullmodel.metrics_names

['loss', 'acc']

In [20]:
'{0:.2f}'.format(99.231341243)


'99.23'