# CNNs: Overfitting + Data Augmentation


### Get Ready

Dataset dogs vs. cats: datasets/dogs-vs-cats-2k.zip

Download, unpack and inspect the structure of our data.

In [0]:
# ! wget .../datasets/dogs-vs-cats-2k.zip

### Data preprocessing

Read images as RGB tensors and rescale them to [0,1] interval. Read images in batches of batch_size=20 and resize them to 150x150.

In [0]:
%tensorflow_version 2.x

from tensorflow.keras.preprocessing.image import ImageDataGenerator

imgs_train_dir = ''
imgs_val_dir = ''

# Rescale images
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(imgs_train_dir, target_size=, class_mode='binary')
validation_generator = test_datagen.flow_from_directory(imgs_val_dir, target_size=, class_mode='binary')

###Implement NN

In [0]:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers

model = models.Sequential()
model.add()  # conv 3x3 32 filters + relu
model.add()  # max pooling 2x2
model.add()  # conv 3x3 64 filters + relu
model.add()  # max pooling 2x2
model.add()  # conv 3x3 128 filters + relu
model.add()  # max pooling 2x2
model.add()  # conv 3x3 128 filters + relu
model.add()  # max pooling 2x2
model.add()  # FC 512 + relu
model.add()  # FC + sigmoid
model.compile(loss='binary_crossentropy', optimizer=, metrics=['acc'])  # RMSprop 1e-4 + accuracy metric

### What is the batch size?

In [0]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

###Train

In [0]:
history = model.fit_generator( train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50 )

###Save a model

In [0]:
model.save('dogs_vs_cats_small_v1.h5')

###Plot accuracy and loss

In [0]:
import matplotlib.pyplot as plt

# get from variable "history"
acc = 
val_acc = 
loss = 
val_loss = 

epochs = 

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

###Data augmentation
Use random transformations:

In [0]:
datagen = ImageDataGenerator( rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest' )

###Visualize:

In [0]:
from tensorflow.keras.preprocessing import image
f = 'dogs-vs-cats-2k/train/dog/dog.70.jpg'
img = image.load_img(f, target_size=(150, 150))
x = image.img_to_array(img)
x = x.reshape((1,) + x.shape)
i = 0
for batch in datagen.flow(x, batch_size=1):
  plt.figure(i)
  imgplot = plt.imshow(image.array_to_img(batch[0]))
  i += 1
  if i % 4 == 0:
    break
plt.show()

###Add dropout 0.5 before FC.

###Train

In [0]:
train_datagen = ImageDataGenerator( rescale=1./255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True )
test_datagen = ImageDataGenerator( rescale=1./255 )
train_generator = train_datagen.flow_from_directory( imgs_train_dir, target_size=(150, 150), batch_size=32, class_mode='binary' )
validation_generator = test_datagen.flow_from_directory( imgs_val_dir, target_size=(150, 150), batch_size=32, class_mode='binary' )
history = model.fit_generator( train_generator, steps_per_epoch=100, epochs=10, validation_data=validation_generator, validation_steps=50 )

###Save a model

###Plot accuracy and loss

In [0]:
acc = 
val_acc = 
loss = 
val_loss = 

epochs = 

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()