<a href="https://colab.research.google.com/github/bhavyaKumawat/tensorflow-datasets/blob/main/Cats_vs_dogs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libraries

In [None]:
import zipfile
import os
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
from matplotlib.image import imread
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16

## Download Kaggle Datasets

In [None]:
drive.mount('/content/drive')

In [None]:
! mkdir /root/.kaggle

In [None]:
! cp 'drive/MyDrive/Colab Notebooks/kaggle.json' '/root/.kaggle/kaggle.json'

In [None]:
! kaggle competitions download -c dogs-vs-cats-redux-kernels-edition

In [None]:
! ls

In [None]:
files = ['test.zip' , 'train.zip']

for File in files:
  with zipfile.ZipFile(File, 'r') as zip_ref:
    zip_ref.extractall(os.getcwd())

In [None]:
print("Training examples : ", len(os.listdir('train')))
print("Test examples : ", len(os.listdir('test')))

In [None]:
def file_to_ndarray(dir , index):
  example_name = os.listdir(dir)[index]
  file_name = os.path.join(dir, example_name)
  img = imread(file_name)
  return img

All images are of different shapes

In [None]:
img1 = file_to_ndarray('train' , 0)
print("example1 Shape : ", img1.shape)

img2 = file_to_ndarray('train' , 1)
print("example2 Shape : ", img2.shape)

Lets plot some images in dataset

In [None]:
plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
plt.imshow(img1)
plt.subplot(1,2,2)
plt.imshow(img2)
plt.show()

## Split dataset for Cats and Dogs

In [None]:
! mkdir dir

In [None]:
! mkdir dir/cats dir/dogs

In [None]:
! cp train/cat.* dir/cats/

In [None]:
print("Cat examples : ", len(os.listdir('dir/cats/')))

In [None]:
! cp train/dog.* dir/dogs/

In [None]:
print("Dog examples : ", len(os.listdir('dir/dogs/')))

##Data augmentation

In [None]:
validation_split = 0.20

datagen = ImageDataGenerator(
    rescale=1.0/255,
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    horizontal_flip=True,
    validation_split=validation_split
)

In [None]:
train_generator = datagen.flow_from_directory(
    'dir',
    target_size=(224, 224),
    color_mode="rgb",
    class_mode='binary',
    batch_size = 32,
    subset='training'
)


validation_generator = datagen.flow_from_directory(
    'dir',
    target_size=(224, 224),
    color_mode="rgb",
    class_mode='binary',
    batch_size = 8,
    subset='validation'
)


## Transfer Learning

In [None]:
base_model = VGG16(
    input_shape = (224, 224, 3),
    include_top = False 
)

In [None]:
base_model.trainable = False

base_model.summary()

In [None]:
last_layer = base_model.layers[-1]
last_output = last_layer.output

In [None]:
x = keras.layers.Flatten()(last_output)
x = keras.layers.Dense(512, activation = 'relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(1, activation = 'sigmoid')(x)

In [None]:
model = keras.Model(base_model.input, x)

In [None]:
optimizer = Adam(learning_rate= 1e-03)
model.compile(optimizer=optimizer, loss = 'binary_crossentropy', metrics=['accuracy'])

## Train the model

In [None]:
training_examples =  len(os.listdir('train'))

history = model.fit(
    train_generator,
    steps_per_epoch= (training_examples*(1-validation_split)) / 32, 
    epochs = 5,
    validation_data = validation_generator,
    validation_steps= (training_examples* validation_split) / 8
)

#Fine-Tuning

In [None]:
base_model.trainable = True

In [None]:
model.compile(optimizer= Adam( learning_rate= 1e-5), loss = 'binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch= (training_examples*(1-validation_split)) / 32, 
    epochs = 1,
    validation_data = validation_generator,
    validation_steps= (training_examples* validation_split) / 8
)

## Training vs Validation accuracy

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

## Submission

In [None]:
! mkdir test_dir

In [None]:
! mv test test_dir

In [None]:
test_datagen = ImageDataGenerator(rescale=1.0/255)

In [None]:
test_generator = test_datagen.flow_from_directory(
    'test_dir',
    # use same color mode as in training
    color_mode="rgb",
    # don't generate labels
    class_mode=None,
    # don't shuffle
    shuffle=False,
    # use same size as in training
    target_size=(224, 224)
    )

We have to predict a probability that the image is a dog <br>
(0 = cat, 1 = dog)

In [None]:
train_generator.class_indices

In [None]:
predictions = model.predict(test_generator)
predictions = np.squeeze(predictions , axis = -1)

Display some images with their predictions

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.title(str(predictions[i]))
    plt.imshow(file_to_ndarray('test_dir/test' , i))
plt.show()

In [None]:
data = {'id': pd.Series(range(1 , len(predictions)+1)), 
        'label':predictions} 

submission = pd.DataFrame(data)

In [None]:
submission.to_csv('submission.csv'  , index=False)

In [None]:
! kaggle competitions submit -c dogs-vs-cats-redux-kernels-edition -f submission.csv -m 'VGG16'