Getting list of categories, saving and exporting to local

In [1]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.optimizers import SGD, RMSprop

# gets jpg count for all images in given directory and all subdirectories
def jpg_counts(dirpath, verbose=False):
    from os import listdir, walk
    #from os.path import isfile, join
    
    # list of all subdirectories
    dirlist = [x[0] for x in walk(dirpath)][1:]
    
    # list of all images in this directory
    imagelist = [f for f in listdir(dirpath) if '.jpg' in f[-4:].lower()]
    if verbose:
        print(len(imagelist),'\n')
    
    # get all images in all subdirectories
    #print(dirlist)
    for currdir in dirlist:
        allfiles = [f for f in listdir(currdir)]
        imagelistsubdir = [f for f in listdir(currdir) if '.jpg' in f[-4:].lower()]
        imagelist += imagelistsubdir
        if verbose:
            if len(allfiles) != len(imagelistsubdir):
                print(currdir, len(imagelistsubdir), 'out of', len(allfiles), 'EXTRA NON JPG FILES')
            else:
                print(currdir, len(imagelistsubdir), 'out of', len(allfiles))

    if verbose:
        print(len(imagelist))
    return len(imagelist)

def get_num_classes(dirpath, verbose=False):
    from os import walk
    
    # get list of all direct subdirectories
    dirlist = next(walk(dirpath))[1]
    
    if verbose:
        print('Classes found:')
        for d in dirlist:
            print(d)
    
    return len(dirlist)

Using TensorFlow backend.


In [2]:
'''
basedir = '/Users/ggarbagnati/ds/metis/metisgh/sf17_ds5/local/Projects/05-Kojak'
train_data_dir = basedir + '/data/train'
validation_data_dir = basedir + '/data/validation'
'''
basedir = '/data/data'
targetdir = basedir + '/train'
valdir = basedir + '/validation'


#img_width, img_height = 761, 800
img_width, img_height = 299, 299 # inception likes 299x299
nb_train_samples = jpg_counts(targetdir)
nb_validation_samples = jpg_counts(valdir)
nb_categories = get_num_classes(targetdir)
batch_size = 32
nb_epoch = 1

# create the base pre-trained model
#base_model = InceptionV3(weights='imagenet', include_top=False)
model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
#x = base_model.output
x = model.output
x = GlobalAveragePooling2D()(x)
# add a fully-connected layer
#x = Dense(1024, activation='relu', name='fc_1')(x)
x = Dense(1024, activation='relu', name='fc_1')(x) # num of neurons in the layer
predictions = Dense(nb_categories, activation='softmax')(x)

'''
# Freeze convolutional layers
for layer in model.layers:
    layer.trainable = False
'''

fullmodel = Model(input=model.input, output=predictions)


# Freeze convolutional layers
for layer in model.layers:
    layer.trainable = False


fullmodel.compile(optimizer=RMSprop(lr = .00001), loss = 'categorical_crossentropy', metrics=['accuracy'])
#fullmodel.compile('rmsprop', 'categorical_crossentropy', metrics=['accuracy'])

train_datagen = ImageDataGenerator(rotation_range=180,
                                    rescale = 1./255.,
                                    shear_range = .2,
                                    zoom_range = .2,
                                    horizontal_flip = True)

# Inception has a custom image preprocess function
test_datagen = image.ImageDataGenerator(rescale=1./255)

generator_train = train_datagen.flow_from_directory(
        targetdir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')

generator_test = test_datagen.flow_from_directory(
        valdir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')

# interrupt kernel after the fit_generator gets a few seconds into the first epoch

fullmodel.fit_generator(generator_train,
            samples_per_epoch = nb_train_samples,
            nb_epoch = nb_epoch,
            validation_data = generator_test,
            nb_val_samples = nb_validation_samples)


# if I get time...
'''
#start fine-tuning
# unfreeze the top 2 inception blocks
for layer in fullmodel.layers[:172]:
   layer.trainable = False
for layer in fullmodel.layers[172:]:
   layer.trainable = True

# use SGD with a low learning rate
fullmodel.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
            loss='categorical_crossentropy', metrics=['accuracy'])

# Train the top 2 inception blocks
fullmodel.fit_generator(generator_train,
                        samples_per_epoch = nb_train_samples,
                        nb_epoch = nb_epoch,
                        validation_data = generator_test,
                        nb_val_samples = nb_validation_samples)
'''

#fullmodel.save('leafincepmodel.h5')

'''
model_json = fullmodel.to_json()
with open('incep_3_multi.json', 'w') as json_file:
    json_file.write(model_json)
fullmodel.save_weights('incep_3_multi.h5')
'''

print('Done!')


Found 51624 images belonging to 46 classes.
Found 22100 images belonging to 46 classes.
Epoch 1/1
  320/51624 [..............................] - ETA: 1527s - loss: 3.8827 - acc: 0.0344

KeyboardInterrupt: 

In [5]:
classeslist = generator_train.class_indices
print(type(classeslist))
classeslist

<class 'dict'>


{'Pepper,_bell_Bacterial_spot': 0,
 'Pepper,_bell_healthy': 1,
 'apple_Apple_scab': 2,
 'apple_Black_rot': 3,
 'apple_Cedar_apple_rust': 4,
 'apple_healthy': 5,
 'banana_Banana_speckle': 6,
 'banana_Black_sigatoka_(Black_leaf_streak)': 7,
 'banana_healthy': 8,
 'cabbage_Black_rot': 9,
 'cabbage_healthy': 10,
 'cherry_Powdery_mildew': 11,
 'cherry_healthy': 12,
 'corn_Cercospora_leaf_spot_Gray_leaf_spot': 13,
 'corn_Common_rust': 14,
 'corn_Northern_Leaf_Blight': 15,
 'corn_healthy': 16,
 'cucumber_Downy_mildew': 17,
 'cucumber_healthy': 18,
 'grape_Black_rot': 19,
 'grape_Esca_(Black_Measles_or_Spanish_Measles)': 20,
 'grape_Leaf_blight_(Isariopsis_Leaf_Spot)': 21,
 'grape_healthy': 22,
 'peach_Bacterial_spot': 23,
 'peach_healthy': 24,
 'potato_Early_blight': 25,
 'potato_Late_blight': 26,
 'potato_healthy': 27,
 'soybean_Downy_mildew': 28,
 'soybean_Frogeye_leaf_spot': 29,
 'soybean_Septoria_Leaf_Blight': 30,
 'soybean_healthy': 31,
 'squash_Powdery_mildew': 32,
 'squash_healthy': 33

In [9]:
classeslist_rev = {y:x for x,y in classeslist.items()}
classeslist_rev

{0: 'Pepper,_bell_Bacterial_spot',
 1: 'Pepper,_bell_healthy',
 2: 'apple_Apple_scab',
 3: 'apple_Black_rot',
 4: 'apple_Cedar_apple_rust',
 5: 'apple_healthy',
 6: 'banana_Banana_speckle',
 7: 'banana_Black_sigatoka_(Black_leaf_streak)',
 8: 'banana_healthy',
 9: 'cabbage_Black_rot',
 10: 'cabbage_healthy',
 11: 'cherry_Powdery_mildew',
 12: 'cherry_healthy',
 13: 'corn_Cercospora_leaf_spot_Gray_leaf_spot',
 14: 'corn_Common_rust',
 15: 'corn_Northern_Leaf_Blight',
 16: 'corn_healthy',
 17: 'cucumber_Downy_mildew',
 18: 'cucumber_healthy',
 19: 'grape_Black_rot',
 20: 'grape_Esca_(Black_Measles_or_Spanish_Measles)',
 21: 'grape_Leaf_blight_(Isariopsis_Leaf_Spot)',
 22: 'grape_healthy',
 23: 'peach_Bacterial_spot',
 24: 'peach_healthy',
 25: 'potato_Early_blight',
 26: 'potato_Late_blight',
 27: 'potato_healthy',
 28: 'soybean_Downy_mildew',
 29: 'soybean_Frogeye_leaf_spot',
 30: 'soybean_Septoria_Leaf_Blight',
 31: 'soybean_healthy',
 32: 'squash_Powdery_mildew',
 33: 'squash_healthy'

In [10]:
import json
json.dumps(classeslist_rev)

with open('classeslist.json', 'w') as f:
    f.write(json.dumps(classeslist_rev))
    