In [2]:
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

In [4]:
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"
    

--2020-12-22 02:52:51--  https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip
Resolving download.microsoft.com...2.21.128.137
Connecting to download.microsoft.com|2.21.128.137|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824894548 (787M) [application/octet-stream]
Saving to: '/tmp/cats-and-dogs.zip'

/tmp/cats-and-dogs.   5%[>                   ]  40.71M  3.06MB/s    eta 4m 4s  ^C


In [5]:
local_zip = '../tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('../tmp')
zip_ref.close()

print(len(os.listdir('../tmp/PetImages/Cat/')))
print(len(os.listdir('../tmp/PetImages/Dog/')))

12501
12501


In [6]:
# Use os.mkdir to create your directories
# You will need a directory for cats-v-dogs, and subdirectories for training
# and testing. These in turn will need subdirectories for 'cats' and 'dogs'
try:
    os.mkdir('../tmp/cats-v-dogs')
    os.mkdir('../tmp/cats-v-dogs/training')
    os.mkdir('../tmp/cats-v-dogs/testing')
    os.mkdir('../tmp/cats-v-dogs/training/cats')
    os.mkdir('../tmp/cats-v-dogs/training/dogs')
    os.mkdir('../tmp/cats-v-dogs/testing/cats')
    os.mkdir('../tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass

## Write a python function called split_data which takes
1. a SOURCE directory containing the files
2. a TRAINING directory that a portion of the files will be copied to
3. a TESTING directory that a portion of the files will be copie to
4. a SPLIT SIZE to determine the portion
5. The files should also be randomized, so that the training set is a random
6. X% of the files, and the test set is the remaining files
SO, for example, if SOURCE is PetImages/Cat, and SPLIT SIZE is .9
7. Then 90% of the images in PetImages/Cat will be copied to the TRAINING dir
8. and 10% of the images will be copied to the TESTING dir
9. Also -- All images should be checked, and if they have a zero file length, they will not be copied over

### os.listdir(DIRECTORY) gives you a listing of the contents of that directory
### os.path.getsize(PATH) gives you the size of the file
### copyfile(source, destination) copies a file from source to destination
### random.sample(list, len(list)) shuffles a list

In [7]:

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):

  file_names=[]
  file_names_non_empty=[]
  file_names = os.listdir(SOURCE)
  for files in file_names:
    if not os.path.getsize(SOURCE+files) == 0:
      file_names_non_empty.append(files)
    else:
      print("Zero Size file:",files)
  random_file_names = random.sample(file_names_non_empty,len(file_names_non_empty))
  split1=random_file_names[:int(0.9*len(random_file_names))]
  split2=random_file_names[int(0.9*len(random_file_names)):]                              
  for files in split1:
      copyfile(SOURCE+files,TRAINING+files)
  for files in split2:
      copyfile(SOURCE+files,TESTING+files)
    
CAT_SOURCE_DIR = "../tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "../tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "../tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "../tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "../tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "../tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)



Zero Size file: 666.jpg
Zero Size file: 11702.jpg


In [8]:
print(len(os.listdir('../tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('../tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('../tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('../tmp/cats-v-dogs/testing/dogs/')))

12399
12389
2399
2389


In [9]:
# DEFINE A KERAS MODEL TO CLASSIFY CATS V DOGS
# USE AT LEAST 3 CONVOLUTION LAYERS
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape=(150,150,3)),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3),activation='relu'),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=RMSprop(lr=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
    )

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 18496)             0

In [11]:
TRAINING_DIR = '../tmp/cats-v-dogs/training'
train_datagen = ImageDataGenerator(rescale=1.0/255)
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(150,150),
    class_mode='binary',
    batch_size=20
    )
VALIDATION_DIR = '../tmp/cats-v-dogs/testing'
validation_datagen = ImageDataGenerator(rescale=1.0/255)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(150,150),
    class_mode='binary',
    batch_size=20
    )


Found 24786 images belonging to 2 classes.
Found 4788 images belonging to 2 classes.


In [12]:
history = model.fit(
    train_generator,
    epochs=15,
    verbose=1,
    validation_data=validation_generator
    )

# The expectation here is that the model will train, and that accuracy will be > 95% on both training and validation
# i.e. acc:A1 and val_acc:A2 will be visible, and both A1 and A2 will be > .9

Epoch 1/15

KeyboardInterrupt: 

In [None]:
import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

# Desired output. Charts with training and validation metrics. No crash :)