# Chapter 2 Image classification

## Training MNIST model in Keras

### Preparing the dataset


In [2]:
import keras

Using TensorFlow backend.


In [0]:
batch_size = 128 
no_classes = 10
epochs = 2
image_height, image_width = 28,28

In [31]:
## Loading the dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
### Formatting the data to make each pixel and individual array adding that way
###  other dimension to the data
x_train = x_train.reshape(x_train.shape[0], image_height, image_width, 1)

x_test = x_test.reshape(x_test.shape[0], image_height, image_width, 1)
input_shape = (image_height, image_width, 1)

In [0]:
### Converting integers from 0 to 255 to float allowing division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')


In [0]:
### normalizing data to be from 0 to 1 to make it easier to train
x_train /= 255
x_test /= 255

In [0]:
#print(x_train[0][12])
print(y_test)

[7 2 1 ... 4 5 6]


In [0]:
### making one shoot encoding classes
y_train = keras.utils.to_categorical(y_train, no_classes)
y_test = keras.utils.to_categorical(y_test, no_classes)

In [0]:
def simple_cnn(input_shape):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(filters = 64,
                                  kernel_size= (3,3),
                                 activation= 'relu',
                                 input_shape = input_shape)
             )
    model.add(keras.layers.Conv2D(filters = 128,
                                 kernel_size= (3,3),
                                  activation = 'relu'
                                 )
             )
    model.add( keras.layers.MaxPooling2D(pool_size = (2,2) ) )
    model.add( keras.layers.Dropout(rate = 0.3))
    model.add( keras.layers.Flatten()) ##Make it one dimension
    model.add( keras.layers.Dense(units= 1024, activation= 'relu'))
    model.add( keras.layers.Dropout(rate= 0.3) )
    model.add( keras.layers.Dense(units= no_classes, activation='softmax'))
    model.compile(loss= keras.losses.categorical_crossentropy,
                  optimizer = keras.optimizers.Adam(),
                  metrics = ['accuracy']
                )
    return model

simple_cnn_model = simple_cnn(input_shape)
    

In [0]:
#print((x_test, y_test))

In [14]:
simple_cnn_model.fit(x_train, y_train, batch_size, epochs, verbose=1, validation_data= (x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f070050f438>

In [15]:
train_loss, train_accuracy  = simple_cnn_model.evaluate(x_train, y_train,
                                                        verbose= 0)
print("Train data loss:" , train_loss)
print("train data accuracy:", train_accuracy)

Train data loss: 0.007419397556534144
train data accuracy: 0.99795


In [16]:
test_loss, test_accuracy = simple_cnn_model.evaluate(x_test, y_test, verbose= 0)

print("Test data loss:" , test_loss)
print("Test data accuracy:", test_accuracy)

Test data loss: 0.03336867708020873
Test data accuracy: 0.9897


## Training model cats vs dogs

### Downloading the data

In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"dasalgadob","key":"cc827058d8ada3fe821c934bbb1f7e7e"}'}

In [0]:
!pip install -q kaggle

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [4]:
!kaggle competitions download -c dogs-vs-cats

Downloading sampleSubmission.csv to /content
  0% 0.00/86.8k [00:00<?, ?B/s]
100% 86.8k/86.8k [00:00<00:00, 26.8MB/s]
Downloading test1.zip to /content
 98% 265M/271M [00:05<00:00, 33.4MB/s]
100% 271M/271M [00:05<00:00, 48.3MB/s]
Downloading train.zip to /content
100% 542M/543M [00:07<00:00, 63.8MB/s]
100% 543M/543M [00:07<00:00, 79.2MB/s]


In [3]:
!ls 

data	     sample_data	   test1      train
kaggle.json  sampleSubmission.csv  test1.zip  train.zip


#### Unzipping data

In [0]:
!unzip train.zip > /dev/null

In [0]:
!unzip test1.zip > /dev/null

### Preparing data

In [0]:
import os
import shutil

In [0]:
work_dir = ''

In [0]:
#print(os.listdir(os.path.join(work_dir, 'train')))
image_names = sorted(os.listdir(os.path.join(work_dir, 'train')))
#print(image_names)

In [0]:
def copy_files(prefix_str, range_start, range_end, target_dir):
    image_paths = [os.path.join(work_dir, 'train', prefix_str + '.' 
                                + str(i) + '.jpg' )
                  for i in range(range_start, range_end)]
    dest_dir = os.path.join(work_dir, 'data', target_dir, prefix_str)
    os.makedirs(dest_dir)
    for image_path in image_paths:
        shutil.copy(image_path, dest_dir)

In [0]:
## copy files with subfolders for dogs and cats
def copy_files2(prefix_str, range_start, range_end, target_dir):
    image_paths = [os.path.join(work_dir, 'train', prefix_str + '.' 
                                + str(i) + '.jpg' )
                  for i in range(range_start, range_end)]
    dest_dir = os.path.join(work_dir, 'data', target_dir, prefix_str)
    os.makedirs(dest_dir)
    for image_path in image_paths:
        shutil.copy(image_path, dest_dir)

In [0]:
##alternative with different folder setup
copy_files2('dog', 0, 1000, 'train')
copy_files2('cat', 0, 1000, 'train')
copy_files2('dog', 1000, 1400, 'test')
copy_files2('cat', 1000, 1400, 'test')

In [9]:
print(work_dir)




In [43]:
image_height, image_width = 150,150
train_dir = os.path.join(work_dir, "data", 'train')
test_dir = os.path.join(work_dir, "data", 'test')
no_classes = 2
no_validation = 800
epochs = 20
batch_size = 16
no_train = 2000
no_test = 800
input_shape = (image_height, image_width, 3)
epoch_steps = no_train // batch_size
test_steps = no_test // batch_size
print(epoch_steps)
print(test_steps)

125
50


In [0]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

In [30]:
if K.image_data_format() == 'channels_first':
    input_shape = (3, image_width, image_height)
else:
    input_shape = (image_width, image_height, 3)

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.summary()

simple_cnn_model = model


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
activation_6 (Activation)    (None, 148, 148, 32)      0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
activation_7 (Activation)    (None, 72, 72, 32)        0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 34, 34, 64)        18496     
__________

In [0]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [0]:
generator_train = keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)
generator_test = keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)


In [38]:
train_images = generator_train.flow_from_directory(
    train_dir,
    batch_size=batch_size,
    target_size=(image_width, image_height),
    class_mode='binary'
)

test_images = generator_test.flow_from_directory(
    test_dir,
    batch_size=batch_size,
    target_size=(image_width, image_height),
    class_mode='binary'
)

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [14]:
print(train_images)

<keras_preprocessing.image.DirectoryIterator object at 0x7f6fab377ef0>


In [39]:
simple_cnn_model.fit_generator(
    train_images,
    steps_per_epoch=epoch_steps,
    epochs=epochs,
    validation_data=test_images,
    validation_steps=test_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f6fa83f0128>

### Augmenting the dataset

In [0]:
generator_train = keras.preprocessing.image.ImageDataGenerator(
    rotation_range= 40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale= 1./255,
    horizontal_flip=True,
    zoom_range= 0.3,
    shear_range= 0.3,
    fill_mode= 'nearest'
    
)

In [53]:
train_images = generator_train.flow_from_directory(
    train_dir,
    batch_size=batch_size,
    target_size=(image_width, image_height),
    class_mode='binary'
)

test_images = generator_test.flow_from_directory(
    test_dir,
    batch_size=batch_size,
    target_size=(image_width, image_height),
    class_mode='binary'
)

Found 2000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [54]:
simple_cnn_model.fit_generator(
    train_images,
    steps_per_epoch=epoch_steps,
    epochs=epochs,
    validation_data=test_images,
    validation_steps=test_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f6fa948f860>

### Transfer learning

In [0]:
generator = keras.preprocessing.image.ImageDataGenerator(rescale = 1. / 255)

In [0]:
model = keras.applications.VGG16(include_top= False)

In [85]:
train_images = generator.flow_from_directory(
    train_dir,
    batch_size= batch_size,
    target_size= (image_width, image_height),
    class_mode = None,
    shuffle = False
)

Found 2000 images belonging to 2 classes.


In [0]:
train_bottleneck_features = model.predict_generator(
    train_images,
    epoch_steps
)

In [87]:
test_images = generator.flow_from_directory(
    test_dir,
    batch_size= batch_size,
    target_size = (image_width, image_height),
    class_mode = None,
    shuffle= False
)

Found 800 images belonging to 2 classes.


In [0]:
test_bottleneck_features = model.predict_generator(test_images, 
                                                   test_steps
                                                  )

In [0]:
import numpy as np

In [90]:
train_labels = np.array([0] * int(no_train / 2) + [1] * int(no_train / 2))
test_labels = np.array([0] * int(no_test / 2) + [1] * int(no_test / 2))
print(train_labels)
print(test_labels)

[0 0 0 ... 1 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 

In [0]:
from keras.utils.np_utils import to_categorical

categorical_labels_train = to_categorical(train_labels)
categorical_labels_test = to_categorical(test_labels)

In [96]:
print(categorical_labels_test)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In [0]:
model = keras.models.Sequential()
model.add(Flatten(input_shape= train_bottleneck_features.shape[1:]))
model.add( Dense(1024, activation= 'relu'))
model.add( Dropout(0.3))
model.add( Dense(1, activation= 'softmax'))
model.compile( loss= keras.losses.categorical_crossentropy,
              optimizer = keras.optimizers.Adam(),
              metrics= ['accuracy']
)

In [103]:
print(len(categorical_labels_test))

800


In [0]:
#print(test_bottleneck_features)
categorical_labels_test =np.reshape(categorical_labels_test,1600)

In [109]:
print(categorical_labels_test)

[1. 0. 1. ... 1. 0. 1.]


In [108]:
model.fit( train_bottleneck_features,
          categorical_labels_train,
          batch_size= batch_size,
          epochs= epochs,
          validation_data = (test_bottleneck_features, categorical_labels_test)
)

ValueError: ignored