# Today's Date - 30 June 2023
# Topic - Training with and without Data Augmentation on large batchs of images

In [12]:
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import load_img, img_to_array

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [2]:
batch_size = 16

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
test_datagen = ImageDataGenerator(
    rescale=1./255
)

## 1000 images of dogs and cats each for data augmentation
## 500 images of dogs and cats each for validation

In [3]:
train_directory = 'A:/CODING/Python/Projects/cat-dog-classifier/data/train'
valid_directory = 'A:/CODING/Python/Projects/cat-dog-classifier/data/valid'

train_augmented_images = train_datagen.flow_from_directory(
    directory=train_directory,
    target_size=(150,150),
    batch_size = batch_size,
    class_mode='binary'
)
validation_images = test_datagen.flow_from_directory(
    directory=valid_directory,
    target_size=(150,150),
    batch_size = batch_size,
    class_mode='binary'
)

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [16]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(150, 150, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [17]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 74, 74, 32)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 72, 72, 32)        9248      
                                                                 
 activation_4 (Activation)   (None, 72, 72, 32)        0         
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 34, 34, 64)       

In [18]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [20]:
model.fit_generator(
        train_augmented_images,
        steps_per_epoch=2000 // batch_size,
        epochs=5,
        validation_data=validation_images,
        validation_steps=800 // batch_size,
        verbose=1
)

  model.fit_generator(


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1fbf3233970>

In [21]:
model.save_weights('first_try.h5')

# We achevied 69% percent accuracy in just 5 epochs
### If trained for more epochs it will reach higher


# Lets try to train without data augmentation, and compare the results
### 1000 images of cats and dogs each for train
### 500 images of cats and dogs each for validation

In [22]:
train_directory = 'A:/CODING/Python/Projects/cat-dog-classifier/data/train'
valid_directory = 'A:/CODING/Python/Projects/cat-dog-classifier/data/valid'

train = keras.utils.image_dataset_from_directory(
    directory=train_directory,
    labels='inferred',
    label_mode='int',
    batch_size=16,
    image_size=(150, 150)  # all images will be reshaped into this size
)

validation = keras.utils.image_dataset_from_directory(
    directory=valid_directory,
    labels='inferred',
    label_mode='int',
    batch_size=16,
    image_size=(150, 150)  # all images will be reshaped into this size
)

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.


In [23]:
print(type(train))

<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>


In [24]:
for images, labels in train:
    print('First image in the batch:')
    print(images[0])  # Print the first image in the batch

    print('First label in the batch:')
    print(labels[0])  # Print the first label in the batch

    break  # Print only the first batch for demonstration purposes


First image in the batch:
tf.Tensor(
[[[180.45233  173.45233  129.45233 ]
  [183.02733  176.02733  132.02733 ]
  [184.46167  177.46167  133.46167 ]
  ...
  [182.64833  185.64833  140.64833 ]
  [187.87036  190.87036  145.87036 ]
  [188.       191.       146.      ]]

 [[185.107    178.107    134.107   ]
  [184.98799  177.98799  133.98799 ]
  [185.56     178.56     134.56    ]
  ...
  [183.       186.       141.      ]
  [186.99902  189.99902  144.99902 ]
  [188.       191.       146.      ]]

 [[185.64     178.64     134.64    ]
  [182.92168  175.92168  131.92168 ]
  [182.34166  175.34166  131.34166 ]
  ...
  [183.74167  186.74167  141.74167 ]
  [185.90002  188.90002  143.90002 ]
  [187.66168  190.66168  145.66168 ]]

 ...

 [[160.56502  155.56502  123.56501 ]
  [156.33165  151.33165  119.33166 ]
  [156.70831  151.70831  119.708305]
  ...
  [149.       152.       123.      ]
  [150.       153.       124.      ]
  [151.       154.       125.      ]]

 [[160.63599  155.63599  123.636   ]


In [25]:
# All the values in numpy array has range of 0 to 255 (each pixel has 0 to 255 brightness range)

# So, we need to normalize
def process (image,label):
  image = tf.cast(image/255, tf.float32)
  return image,label

train = train.map(process)
validation = validation.map(process)

In [26]:
for images, labels in train:
    print('First image in the batch:')
    print(images[0])  # Print the first image in the batch

    print('First label in the batch:')
    print(labels[0])  # Print the first label in the batch

    break  # Print only the first batch for demonstration purposes


First image in the batch:
tf.Tensor(
[[[0.79428035 0.90408427 0.9511431 ]
  [0.8012638  0.9168716  0.9610285 ]
  [0.8111111  0.92784315 0.9737255 ]
  ...
  [0.4762342  0.5302211  0.38136432]
  [0.3097035  0.35966432 0.24813476]
  [0.4248857  0.4719445  0.38373542]]

 [[0.7709587  0.88076264 0.9238999 ]
  [0.79377806 0.90700626 0.9501435 ]
  [0.8058993  0.92033076 0.9638431 ]
  ...
  [0.23033671 0.2821759  0.15080716]
  [0.35338458 0.40215555 0.3074888 ]
  [0.46773985 0.51358736 0.4376484 ]]

 [[0.8089904  0.91487277 0.9501669 ]
  [0.817651   0.9235333  0.95882744]
  [0.7976972  0.9044946  0.94161874]
  ...
  [0.38177824 0.4279068  0.32847533]
  [0.4301672  0.47614107 0.40206647]
  [0.48884404 0.53191596 0.4753077 ]]

 ...

 [[0.61637354 0.58924985 0.620426  ]
  [0.5787589  0.5573423  0.58566487]
  [0.39359498 0.38376072 0.41019627]
  ...
  [0.575621   0.5950979  0.6188237 ]
  [0.565426   0.5849029  0.6086287 ]
  [0.573845   0.5933219  0.6111392 ]]

 [[0.60186577 0.56555206 0.59987885]


In [27]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(150, 150, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [28]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [29]:
model.fit(train, epochs=5, validation_data=validation, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1fbf32ea620>

# On training on higher number of epochs, augmented data will give higher accuracy