# Dogs vs. Cats / Deep Residual Network (ResNet)


In [None]:
!unzip -q   /kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip -d .
!unzip -q  /kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip -d .

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import load_img,img_to_array
import numpy as np
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import time
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import shutil
from keras.preprocessing.image import ImageDataGenerator

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

print("GPU is {}".format(tf.config.list_physical_devices('GPU')))
print("tensorflow version {}".format(tf.__version__))

print(os.listdir("./"))

!nvidia-smi

keras.backend.clear_session()

# Data Pre-processing and Visualization

In [None]:

def show_cats_and_dogs(show="",width=150,height=150, images_path ='./train/'):
  cols = 25
  limit = 100
  index = 0
  images = list()
  vertical_images=[]
 
  for path in os.listdir(images_path):
    if show != "" and  (show in path)==False:
          continue
    index=index+1
    if index%limit==0:
        break
    #keras.preprocessing.image
    image = load_img(images_path+path, target_size=(width,height))
    image= img_to_array(image) #to numpy
    image_height, image_width, image_channel = image.shape
    horizontal_side = np.ones((image_height, 5,  image_channel), dtype=np.float32)*255
    
    images.append(image)
    images.append(horizontal_side)

    if index%cols==0:
      horizontal_image = np.hstack((images))
      image_height, image_width, image_channel = horizontal_image.shape
      vertical_side = np.ones((5, image_width,  image_channel), dtype=np.float32)*255
      vertical_images.append(horizontal_image)
      vertical_images.append(vertical_side)
      images=list()
  gallery=np.vstack((vertical_images)) 
  plt.figure(figsize=(12,12))
  plt.xticks([])
  plt.yticks([])
  title={"":"cães & gatos",
          "cat": "gatos",
          "dog": "cães"}
  plt.title("{} imagens de {} [ path {} ] .".format(limit, title[show],images_path))
  plt.imshow(gallery.astype(np.uint8))

In [None]:
# raw Dataset
print("O dataset possui {} imagens de gatos e cães para classificação.".format(len(os.listdir("./train"))))
print("O dataset de teste possui {}.".format(len(os.listdir("./test"))))


### Gatos

In [None]:
show_cats_and_dogs(show='cat')

### Cães

In [None]:
show_cats_and_dogs(show='dog')

### Ambos

In [None]:
show_cats_and_dogs(show='')

### Test

In [None]:
show_cats_and_dogs(images_path='./test/')

### Pre-processing

In [None]:
image_width,image_height = 150,150#299,299
labels =['cat','dog']
for d in labels:
  dir_path = './train/' + d
  if not os.path.exists(dir_path):
    print('{} criado.'.format(dir_path))
    os.mkdir(dir_path)
  else:
    print('{} já existe.'.format(dir_path))


train_path ="./train/"
for  file in  os.listdir(train_path):
  category = file.split(".")[0]
  if '.jpg' in file:
    if 'dog'in category: 
      shutil.copyfile(train_path+file,'./train/dog/'+ file)
    elif 'cat'in category:  
      shutil.copyfile(train_path+file,'./train/cat/'+ file)


In [None]:
print("Total de cães:\t{}".format(sum([len(files) for r, d, files in os.walk('./train/dog/')])))
print("Total de gatos:\t{}".format(sum([len(files) for r, d, files in os.walk('./train/cat/')])))

In [None]:
keras.backend.clear_session()
batch_size=32
validation_split=0.2
val_size = 7500
dataset_size = 17500 
train_data_generator = ImageDataGenerator(rescale=1./255, 
                                          horizontal_flip=True, 
                                          featurewise_center=False,
                                          samplewise_center=False,
                                          featurewise_std_normalization=False,
                                          samplewise_std_normalization=False,
                                          zca_whitening=False,
                                          rotation_range=0,
                                          width_shift_range=0.1,
                                          height_shift_range=0.1,
                                          vertical_flip=False,
                                          validation_split=validation_split)

train_datagenerator = train_data_generator.flow_from_directory(train_path,
                                                    target_size=(image_width,image_height ),
                                                    class_mode="categorical",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    subset='training')

val_datagenerator = train_data_generator.flow_from_directory(train_path,
                                                     target_size=(image_width,image_height),
                                                     class_mode="categorical",
                                                     shuffle=True,
                                                     batch_size=batch_size,
                                                     subset='validation')


In [None]:
train_datagenerator.class_indices.keys(),val_datagenerator.class_indices.keys()

In [None]:
from tensorflow.keras.layers import Dense, Conv2D,BatchNormalization, Activation,AveragePooling2D, Input,Flatten, add
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

In [None]:
keras.backend.clear_session()
input_shape =  (image_width,image_height,3)
num_classes = 2
# learning_rate_scheduler  reduzir a taxa de aprendizado
def learning_rate_scheduler(epoch=0):
  lr = 1e-3
  if epoch > 90:
    lr *= 0.5e-3
  elif epoch > 80:
    lr *= 1e-3
  elif epoch > 60:
    lr *= 1e-2
  elif epoch > 40:
    lr *= 1e-1
  print('Learning rate: ', lr)
  return lr

def resnet_layer(inputs,
                 num_filters=16,
                 kernel_size=3,
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True ):
  #Contruir: 2D Convolution -> Batch Normalization -> Activation
  conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))
  x = inputs  
  if conv_first:
      x = conv(x)
      if batch_normalization:
          x = BatchNormalization()(x)
      if activation is not None:
          x = Activation(activation)(x)
  else:
      if batch_normalization:
          x = BatchNormalization()(x)
      if activation is not None:
          x = Activation(activation)(x)
      x = conv(x)
  return x

depth=27
num_filters_in = 16
num_res_blocks = int((depth - 2) / 9)

inputs = Input(shape=input_shape)
x = resnet_layer(inputs=inputs,num_filters=num_filters_in, conv_first=True)
#unidades residual
for stage in range(3):
  for res_block in range(num_res_blocks):
    activation = 'relu'
    batch_normalization = True
    strides = 1
    if stage == 0:
        num_filters_out = num_filters_in * 4
        # first layer and first stage
        if res_block == 0:  
            activation = None
            batch_normalization = False
    else:
        num_filters_out = num_filters_in * 2
        # first layer but not first stage
        if res_block == 0:
            # downsample
            strides = 2 
    #gargalo...
    
    y = resnet_layer(inputs=x,
                      num_filters=num_filters_in,
                      kernel_size=1,
                      strides=strides,
                      activation=activation,
                      batch_normalization=batch_normalization,
                      conv_first=False)
    
    y = resnet_layer(inputs=y,
                      num_filters=num_filters_in,
                      conv_first=False)
    
    y = resnet_layer(inputs=y,
                      num_filters=num_filters_out,
                      kernel_size=1,
                      conv_first=False)
    
    if res_block == 0:
        # linear projection residual shortcut connection
        # to match changed dims
        x = resnet_layer(inputs=x,
                          num_filters=num_filters_out,
                          kernel_size=1,
                          strides=strides,
                          activation=None,
                          batch_normalization=False)
    x = add([x, y])
  num_filters_in = num_filters_out
# v2 has BN-ReLU before Pooling
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=8)(x)
y = Flatten()(x)
outputs = Dense(num_classes,
                activation='softmax',
                kernel_initializer='he_normal')(y)

# instantiate model.
model = Model(inputs=inputs, outputs=outputs)

model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate_scheduler()), metrics=['acc'])

In [None]:
import os
import math

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'cifar10_model.{epoch:03d}.h5' 
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

lr_scheduler = LearningRateScheduler(learning_rate_scheduler)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

callbacks = [checkpoint, lr_reducer, lr_scheduler]


In [None]:
epochs=100
steps_per_epoch =  math.ceil(dataset_size / batch_size)
history = model.fit(train_datagenerator,
              verbose=1,
              epochs=epochs,
               validation_data=val_datagenerator,
              steps_per_epoch=steps_per_epoch,
              callbacks=callbacks)

In [None]:
print("Train Accuracy:\t\t{:.3f}".format(history.history['acc'][-1]))
print("Val   Accuracy:\t\t{:.3f}".format(history.history['val_acc'][-1]))
print('')
print("Train Loss:\t\t{:.3f}".format(history.history['loss'][-1]))
print("Val   Loss:\t\t{:.3f}".format(history.history['val_loss'][-1]))

In [None]:
epochs = list(range(1,len(history.history['acc'])+1))
epochs
plt.plot(epochs, history.history['acc'],epochs,history.history['val_acc'])
plt.legend(('Training','Validation'))
plt.show()

### Loss

In [None]:
epochs = list(range(1,len(history.history['loss'])+1))
epochs
plt.plot(epochs, history.history['loss'],epochs,history.history['val_loss'])
plt.legend(('Training','Validation'))
plt.show()

In [None]:
test_path ="./test/"
if not os.path.exists("./test"):
  os.mkdir("./test")
  print('./test criado.')

dir_path = "./test/data"
if not os.path.exists(dir_path):
  print('{} criado.'.format(dir_path))
  os.mkdir(dir_path)
else:
  print('{} já existe.'.format(dir_path))
for file in os.listdir(test_path):
    if '.jpg' in file:
        shutil.copyfile(test_path+file,dir_path+'/'+file)

print("Total de gatos:\t{}".format(sum([len(files) for r, d, files in os.walk(dir_path+'/')])))

test_path = dir_path+'/'
test_data_generator = ImageDataGenerator(rescale=1./255)

test_generator = test_data_generator.flow_from_directory(directory ='./test',
                                                         target_size=(image_width,image_height),
                                                     batch_size=batch_size,
                                                     class_mode=None,
                                                     shuffle=False)

In [None]:
predict = model.predict(test_generator,verbose=1)
predict_norm = np.argmax(predict,-1).astype(np.float32)

In [None]:
import random
for _ in  range(0,5):
    index =  random.randint(0, len( test_generator.filenames))
    path= test_generator.filenames[index]
    plt.figure(figsize=(4, 4))
    img=load_img('./test/'+path, target_size=(image_width,image_height))
    plt.imshow(img)
    print(predict[index,1],predict_norm[index])
    if (predict_norm[index]) >= 1.:
        label=' Dog 🐶 '
    else:
        label=' Cat 🐱 '
    plt.colorbar()
    plt.grid(True)
    plt.axis("off")
    plt.title("Class: {}".format(label))
    plt.show()

In [None]:
submission = pd.DataFrame({
    'id':pd.Series(test_generator.filenames),
    'label':pd.Series(predict_norm)
    })
submission['id'] = submission.id.str.extract('(\d+)')
submission['id']=pd.to_numeric(submission['id']).astype('int')
submission['label']=pd.to_numeric(submission['label']).astype('int')
submission.to_csv("submission_fork.csv",index=False)

In [None]:
submission.head(10)

In [None]:
shutil.rmtree("./test")
shutil.rmtree("./train")
shutil.rmtree("./saved_models")