In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2 
import keras
from keras.layers import Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [2]:
# separate data into cat/dog files
# path = 'data/train/train/'

# if not os.path.exists('data/train/cat'):
#     os.mkdir('data/train/cat')
# if not os.path.exists('data/train/dog'):
#     os.mkdir('data/train/dog')

# for file in os.listdir(path):
#     if 'cat' in file:
#         shutil.copyfile(path + file, 'data/train/cat/' + file)
#     if 'dog' in file:
#         shutil.copyfile(path + file, 'data/train/dog/' + file)

In [3]:
# create validation
# if not os.path.exists('data/valid'):
#     os.mkdir('data/valid')
# if not os.path.exists('data/valid/cat'):
#     os.mkdir('data/valid/cat')
# if not os.path.exists('data/valid/dog'):
#     os.mkdir('data/valid/dog')

# for file in os.listdir('data/train/cat')[:1875]:
#     shutil.copyfile('data/train/cat/' + file, 'data/valid/cat/' + file)
# for file in os.listdir('data/train/dog')[:1875]:
#     shutil.copyfile('data/train/dog/' + file, 'data/valid/dog/' + file)

In [4]:
# create test 
# if not os.path.exists('data/test'):
#     os.mkdir('data/test')
# if not os.path.exists('data/test/cat'):
#     os.mkdir('data/test/cat')
# if not os.path.exists('data/test/dog'):
#     os.mkdir('data/test/dog')

# for file in os.listdir('data/train/cat')[1875:3125]:
#     shutil.copyfile('data/train/cat/' + file, 'data/test/cat/' + file)
# for file in os.listdir('data/train/dog')[1875:3125]:
#     shutil.copyfile('data/train/dog/' + file, 'data/test/dog/' + file)

In [5]:
# delete training data that were putted in valid and test 

# for file in os.listdir('data/train/cat')[:3125]:
#     os.remove('data/train/cat/' + file)
# for file in os.listdir('data/train/dog')[:3125]:
#     os.remove('data/train/dog/' + file)

In [6]:
# create image generator
batch_size = 32
size_img = 224

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

validation_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_generator = ImageDataGenerator(rescale=1/255)



# Train, validation and test sets
trainset = train_datagen.flow_from_directory(os.path.join('data','train'),
                                             batch_size=batch_size, 
                                             target_size=(size_img, size_img),
                                             shuffle=True)

validset = validation_datagen.flow_from_directory(os.path.join('data','valid'),
                                             batch_size=batch_size, 
                                             target_size=(size_img, size_img),
                                             shuffle=False)

testset = test_generator.flow_from_directory(os.path.join('data','test'),
                                             batch_size=batch_size, 
                                             target_size=(size_img, size_img),
                                             shuffle=False)

Found 18750 images belonging to 2 classes.
Found 3750 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [7]:
# Lets use the MobileNetV2 pre trained and set his layers as non trainable

mobilenet_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False)
mobilenet_model.trainable=False

# add layers to our mobilenet to train on our current dataset

model = tf.keras.Sequential(
    [
    mobilenet_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Flatten(),
    
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(2, activation="softmax")
]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 flatten (Flatten)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 64)                81984     
                                                                 
 dense_1 (Dense)             (None, 2)                 130       
                                                                 
Total params: 2,340,098
Trainable params: 82,114
Non-tra

In [8]:
# Compile our model 
if not os.path.exists('model'):
    os.mkdir('model')

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=["accuracy"])

# set epoch and model monitor
epochs = 15
early_stopping = ModelCheckpoint(filepath='model/catvsdog_1.h5', monitor='val_accuracy', verbose=1, save_best_only=True)

In [9]:
hist = model.fit(trainset, epochs=epochs, validation_data=validset, callbacks=early_stopping)

Epoch 1/15
Epoch 1: val_accuracy improved from -inf to 0.97413, saving model to model\catvsdog_1.h5
Epoch 2/15
Epoch 2: val_accuracy improved from 0.97413 to 0.98000, saving model to model\catvsdog_1.h5
Epoch 3/15
Epoch 3: val_accuracy did not improve from 0.98000
Epoch 4/15
Epoch 4: val_accuracy did not improve from 0.98000
Epoch 5/15
Epoch 5: val_accuracy improved from 0.98000 to 0.98187, saving model to model\catvsdog_1.h5
Epoch 6/15
Epoch 6: val_accuracy did not improve from 0.98187
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.98187
Epoch 8/15
Epoch 8: val_accuracy did not improve from 0.98187
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.98187
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.98187
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.98187
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.98187
Epoch 13/15
Epoch 13: val_accuracy did not improve from 0.98187
Epoch 14/15
Epoch 14: val_accuracy did not improve from 0.98187
Ep

In [21]:
# lets look at the score for our test set 

# first get the names to have our targets:
test_target=testset.filenames
targets = []
for t in test_target:
    if 'cat' in t:
        targets.append(0)
    else:
        targets.append(1)

targets.count(0)

1250

In [22]:
predictions = model.predict(testset)

In [27]:
labels = []
for p in predictions:
    if p[0] > p[1]:
        labels.append(0)
    else:
        labels.append(1)

In [41]:
df = pd.DataFrame()
df['filenames'] = test_target
df['targets'] = targets
df['predictions'] = labels

df.head()

Unnamed: 0,filenames,targets,predictions
0,cat\cat.11685.jpg,0,0
1,cat\cat.11686.jpg,0,0
2,cat\cat.11687.jpg,0,0
3,cat\cat.11688.jpg,0,0
4,cat\cat.11689.jpg,0,0


In [43]:
# compare the targets and predictions
error_percentage = (np.abs(df.targets - df.predictions)).sum()/2500*100

print(f"our model give us a score of 98.187% on the validationset and {100-error_percentage} % on the test set")

our model give us a score of 98.187% on the validationset and 98.4 % on the test set
