# Image Recognition (Dogs&Cats Excercise)

## The goal of this excercise is to develop and tune a neural network that would be able to recognize dogs and cats on pictures using limited data.

## Creating a smaller dataset of 1000 dogs images, 1000 cats images for training; 500 dog and 500 cat images for validation; and 500 dog and 500 cat images for testing, with total of 2000 training samples, 1000 validation samples, and 1000 test samples.

In [None]:
import os, shutil

original_dataset_dir = "data/full_data/train"

base_dir = "data/partial_data"
os.mkdir(base_dir)

# creates folders for training, validation, and testing data
train_dir = os.path.join(base_dir, "train")
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, "validation")
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, "test")
os.mkdir(test_dir)

# creates individual folders for cats and for dogs for each category: training samples, validation samples, and test samples
# this is for train samples
train_cats_dir = os.path.join(train_dir, "train_cats")
os.mkdir(train_cats_dir)
train_dogs_dir = os.path.join(train_dir, "train_dogs")
os.mkdir(train_dogs_dir)

# this is for validation samples
validation_cats_dir = os.path.join(validation_dir, "validation_cats")
os.mkdir(validation_cats_dir)
validation_dogs_dir = os.path.join(validation_dir, "validation_dogs")
os.mkdir(validation_dogs_dir)

# thus us for test samples
test_cats_dir = os.path.join(test_dir, "test_cats")
os.mkdir(test_cats_dir)
test_dogs_dir = os.path.join(test_dir, "test_dogs")
os.mkdir(test_dogs_dir)

# copies first 1000 cats images into train_cats folder
fnames = ["cat.{}.jpg".format(i) for i in range(1000)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(train_cats_dir, filename)
    shutil.copyfile(src, dst)

# copies next 500 pictures of cats into validation_cats folder
fnames = ["cat.{}.jpg".format(i) for i in range(1000, 1500)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(validation_cats_dir, filename)
    shutil.copyfile(src, dst)

# copies next cats images into test_cats folder
fnames = ["cat.{}.jpg".format(i) for i in range(1500, 2000)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(test_cats_dir, filename)
    shutil.copyfile(src, dst)

# copies first 1000 dogs images into train_cats folder
fnames = ["dog.{}.jpg".format(i) for i in range(1000)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(train_dogs_dir, filename)
    shutil.copyfile(src, dst)

# copies next 500 pictures of dog into validation_cats folder
fnames = ["dog.{}.jpg".format(i) for i in range(1000, 1500)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(validation_dogs_dir, filename)
    shutil.copyfile(src, dst)

# copies next 500 dogs images into test_cats folder
fnames = ["dog.{}.jpg".format(i) for i in range(1500, 2000)]
for filename in fnames:
    src = os.path.join(original_dataset_dir, filename)
    dst = os.path.join(test_dogs_dir, filename)
    shutil.copyfile(src, dst)


## Here I build a model to train

In [None]:
from tensorflow.keras import models, layers, optimizers

model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation="relu", input_shape=(150, 150, 3)))
model.add(layers.MaxPool2D(2, 2))
model.add(layers.Conv2D(64, (3, 3), activation="relu"))
model.add(layers.MaxPool2D(2, 2))
model.add(layers.Conv2D(128, (3, 3), activation="relu"))
model.add(layers.MaxPool2D(2, 2))
model.add(layers.Conv2D(128, (3, 3), activation="relu"))
model.add(layers.MaxPool2D(2, 2))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(1, activation="sigmoid"))

model.compile(
    optimizer=optimizers.RMSprop(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["acc"],
)
print(model.summary())


## Here I preprocess the image data: convert it into a numpy array.

In [1]:
import json
import numpy as np

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    "data/partial_data/train",
    target_size=(150, 150),
    class_mode="binary",
)

jsnfile = open(os.path.join("data\\partial_data\\train", "processed_training_data.json"))

x_train = np.zeros((1, 150, 150, 3))
y_targets = np.zeros((1))

for data_batch, labels_batch in train_generator:

    x_train = np.append(x_train, data_batch, 0)
    y_targets = np.append(y_targets, labels_batch, 0)


json.dump((x_train, y_targets))


Found 2000 images belonging to 2 classes.


MemoryError: Unable to allocate 808. MiB for an array with shape (1569, 150, 150, 3) and data type float64