In [60]:
import os, shutil
from keras import layers
from keras import models
from keras import optimizers
from keras.wrappers.scikit_learn import KerasClassifier
import math

In [61]:
#List of categories that will be considered. The items of this list have the same name as the names of the folders 
#the images are strored in.

list_categories = ['bar','bowling','buffet','casino','concert_hall','fastfood_restaurant','gameroom','gym',
                  'hairsalon','movietheater','restaurant','airport_inside','church_inside','cloister','elevator',
                  'florist','inside_bus','library','locker_room','museum','poolinside','prisoncell','subway',
                   'trainstation','waitingroom']

In [62]:
# Next we create the file structure to split the images in train, validation and test sets.


#Where the original images are stored.
original_dataset_dir = 'original_images'

#Where we will store the images separated by train, validation and test sets.
base_dir = 'images'

os.mkdir(base_dir)

train_dir = os.path.join(base_dir,'train')
os.mkdir(train_dir)

validation_dir = os.path.join(base_dir,'validation')
os.mkdir(validation_dir)

test_dir = os.path.join(base_dir,'test')
os.mkdir(test_dir)

In [63]:
#Inside each of the folders created peveously, we create a folder for each of the categories.

for i in range(len(list_categories)):
    os.mkdir(os.path.join(train_dir,list_categories[i]))
    
for i in range(len(list_categories)):
    os.mkdir(os.path.join(validation_dir,list_categories[i]))
    
for i in range(len(list_categories)):
    os.mkdir(os.path.join(test_dir,list_categories[i]))

In [64]:
#Next we divide the images into train(60%), validation(20%) and test(20%) and copy them to the new file structure.

for i in range(len(list_categories)):
    directory = os.path.join(original_dataset_dir,list_categories[i])
    number_of_files = sum(1 for item in os.listdir(directory) if os.path.isfile(os.path.join(directory, item)))
    train_indice = math.ceil(number_of_files*0.6)
    validation_indice = math.ceil(number_of_files*0.8)
    iterator = 0
    for file in os.listdir(directory):
        if iterator < train_indice:
            src = os.path.join(directory,file)
            dst = os.path.join(train_dir,list_categories[i],file)
            shutil.copyfile(src,dst)
        elif iterator < validation_indice:
            src = os.path.join(directory,file)
            dst = os.path.join(validation_dir,list_categories[i],file)
            shutil.copyfile(src,dst)
        else:
            src = os.path.join(directory,file)
            dst = os.path.join(test_dir,list_categories[i],file)
            shutil.copyfile(src,dst)
        iterator += 1

In [75]:
#Model definition

def build_classifier():

    model = models.Sequential()

    model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)))
    model.add(layers.MaxPooling2D((2,2)))

    model.add(layers.Conv2D(64, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))

    model.add(layers.Conv2D(128, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))

    model.add(layers.Conv2D(128, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D((2,2)))

    model.add(layers.Flatten())
    
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(512,activation='relu'))

    model.add(layers.Dense(25,activation='sigmoid'))
    
    #Model Compilation
    model.compile(loss='categorical_crossentropy',
             optimizer=optimizers.RMSprop(lr=1e-4),
             metrics=['acc'])
    
    return model

model = build_classifier()

In [76]:
#Data genaration for the training and validation steps

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150,150),
        batch_size=20,
        class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
        validation_dir,
        target_size=(150,150),
        batch_size=20,
        class_mode='categorical')

Found 3492 images belonging to 25 classes.
Found 1163 images belonging to 25 classes.


In [77]:
for data_batch, labels_batch in train_generator:
    print('data batch shape: ', data_batch.shape)
    print('labels batch shape: ', labels_batch.shape)
    break

data batch shape:  (20, 150, 150, 3)
labels batch shape:  (20, 25)


In [None]:
#Fitting the model:

history = model.fit_generator(
    train_generator,
    steps_per_epoch=150,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50)

In [43]:
#Data generation for the test step:

test_set = test_datagen.flow_from_directory(test_dir,
                                            target_size = (150, 150),
                                            batch_size = 20,
                                            class_mode = 'categorical')

Found 1150 images belonging to 25 classes.


In [44]:
#Evaluate model

model.evaluate_generator(test_set,58)

[5.6572418212890625, 0.26608696579933167]

In [79]:
#Next we will try to use the same model, but with the use of Data Augmentation:

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150,150),
        batch_size=32,
        class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(
        validation_dir,
        target_size=(150,150),
        batch_size=32,
        class_mode='categorical')

Found 3492 images belonging to 25 classes.
Found 1163 images belonging to 25 classes.


In [80]:
#Fitting the model:

history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=50)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [81]:
#Data generation for the test step:

test_set = test_datagen.flow_from_directory(test_dir,
                                            target_size = (150, 150),
                                            batch_size = 20,
                                            class_mode = 'categorical')

Found 1150 images belonging to 25 classes.


In [82]:
#Evaluate model

model.evaluate_generator(test_set,58)

[1.7866178750991821, 0.4182608723640442]