In [9]:
# dataset prep one time
import os, shutil, pathlib
original_dir = pathlib.Path("./../data/PetImages")
new_basedir = pathlib.Path("./../data/PetImages/cat_vs_dog_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        newdir = new_basedir / subset_name / category
        os.makedirs(newdir, exist_ok=True)
        fnames = [f"{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src= original_dir / category / fname,
                            dst = newdir / f"{fname}")

os.listdir(original_dir)

['Cat', 'cat_vs_dogs_small', 'Dog', 'cat_vs_dog_small']

In [11]:
make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

In [56]:
from tensorflow import keras
from tensorflow.keras import layers

inputs = keras.Input(shape=(180, 180, 3))
x = layers.Rescaling(1./255)(inputs)
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

In [57]:
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 180, 180, 3)]     0         
                                                                 
 rescaling_4 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 conv2d_20 (Conv2D)          (None, 178, 178, 32)      896       
                                                                 
 max_pooling2d_16 (MaxPooli  (None, 89, 89, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_21 (Conv2D)          (None, 87, 87, 64)        18496     
                                                                 
 max_pooling2d_17 (MaxPooli  (None, 43, 43, 64)        0         
 ng2D)                                                     

In [58]:
model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

In [59]:
from tensorflow.keras.utils import image_dataset_from_directory
train_dataset = image_dataset_from_directory(
    new_basedir / "train",
    image_size=(180, 180),
    batch_size=32
)
val_dataset = image_dataset_from_directory(
    new_basedir / "validation",
    image_size=(180, 180),
    batch_size=32
)

test_dataset = image_dataset_from_directory(
    new_basedir / "test",
    image_size=(180, 180),
    batch_size=32
)

Found 1999 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


In [60]:
clean_train_dataset = train_dataset.ignore_errors()
clean_val_dataset = val_dataset.ignore_errors()
clean_test_dataset = test_dataset.ignore_errors()


In [64]:
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath="convnet_from_scratch",
        save_best_only=True,
        monitor="val_loss"
    )
]
history = model.fit(
    clean_train_dataset,
    epochs=30,
    validation_data=clean_val_dataset,
    callbacks=callbacks
)

Epoch 1/30
     63/Unknown - 3s 39ms/step - loss: 0.5618 - accuracy: 0.7224

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


INFO:tensorflow:Assets written to: convnet_from_scratch/assets


INFO:tensorflow:Assets written to: convnet_from_scratch/assets


Epoch 2/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 3/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 4/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


INFO:tensorflow:Assets written to: convnet_from_scratch/assets


INFO:tensorflow:Assets written to: convnet_from_scratch/assets


Epoch 5/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 6/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 7/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 8/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 9/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 10/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 11/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 12/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 13/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 14/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 15/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 16/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 17/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 18/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 19/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 20/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 21/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 22/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 23/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 24/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 25/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 26/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 27/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 28/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 29/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


Epoch 30/30

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9




In [50]:
batch = None
for i, (db, lb) in enumerate(clean_val_dataset):
    print(i)
    print("CHECK: ", i*32)
    print(db.shape)
    print(lb.shape)

0
CHECK:  0
(32, 180, 180, 3)
(32,)
1
CHECK:  32
(32, 180, 180, 3)
(32,)
2
CHECK:  64
(32, 180, 180, 3)
(32,)
3
CHECK:  96
(32, 180, 180, 3)
(32,)
4
CHECK:  128
(32, 180, 180, 3)
(32,)
5
CHECK:  160
(32, 180, 180, 3)
(32,)
6
CHECK:  192
(32, 180, 180, 3)
(32,)
7
CHECK:  224
(32, 180, 180, 3)
(32,)
8
CHECK:  256
(32, 180, 180, 3)
(32,)
9
CHECK:  288
(32, 180, 180, 3)
(32,)
10
CHECK:  320
(32, 180, 180, 3)
(32,)
11
CHECK:  352
(32, 180, 180, 3)
(32,)
12
CHECK:  384
(32, 180, 180, 3)
(32,)
13
CHECK:  416
(32, 180, 180, 3)
(32,)
14
CHECK:  448
(32, 180, 180, 3)
(32,)
15
CHECK:  480
(32, 180, 180, 3)
(32,)
16
CHECK:  512
(32, 180, 180, 3)
(32,)
17
CHECK:  544
(32, 180, 180, 3)
(32,)
18
CHECK:  576
(32, 180, 180, 3)
(32,)
19
CHECK:  608
(32, 180, 180, 3)
(32,)
20
CHECK:  640
(32, 180, 180, 3)
(32,)
21
CHECK:  672
(32, 180, 180, 3)
(32,)
22
CHECK:  704
(32, 180, 180, 3)
(32,)
23
CHECK:  736
(32, 180, 180, 3)
(32,)
24
CHECK:  768
(32, 180, 180, 3)
(32,)
25
CHECK:  800
(32, 180, 180, 3)
(32,)
2

Corrupt JPEG data: 214 extraneous bytes before marker 0xd9


In [52]:
import os
from PIL import Image

def validate_jpeg_files(directory):
    corrupt_files = []
    for root, _, files in os.walk(directory):
        for filename in files:
            filepath = os.path.join(root, filename)
            if not filename.lower().endswith(('.jpg', '.jpeg')):
                continue
            try:
                # Try to open and verify the image
                with Image.open(filepath) as img:
                    # Force load the image data
                    img.verify()
                    
                # Double check by trying to load it again
                with Image.open(filepath) as img:
                    img.load()
                    
            except Exception as e:
                corrupt_files.append({
                    'path': filepath,
                    'error': str(e)
                })   
    return corrupt_files

In [55]:
validate_jpeg_files("./../data/PetImages/cat_vs_dog_small")

[]