In [2]:
import tensorflow as tf
import os 

In [2]:
# !pip install tqdm

Collecting tqdm
  Using cached tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.64.0


In [3]:
from tqdm import tqdm
from urllib import request as req

In [5]:
SOURCE_DATA_URL = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"

DESTINATION = "data"
data_file = "data.zip"
os.makedirs(DESTINATION,exist_ok=True)
DESTINATION_ZIP_PATH = os.path.join(DESTINATION,data_file)

In [20]:
file_name,headers=req.urlretrieve(SOURCE_DATA_URL,DESTINATION_ZIP_PATH)

In [21]:
file_name

'data\\data.zip'

In [23]:
print(headers)

Content-Type: application/octet-stream
Accept-Ranges: bytes
Server: Windows-Azure-Blob/1.0 Microsoft-HTTPAPI/2.0
x-ms-blob-content-md5: 4TekUHNw2UJGm20meiTqBA==
Last-Modified: Mon, 09 May 2022 20:42:07 GMT
ETag: "0x8DA31FC621E01C4"
Content-Length: 824887076
Date: Thu, 12 May 2022 14:02:34 GMT
Connection: close




In [24]:
from zipfile import ZipFile

with ZipFile(file_name,"r") as zip_f:
    zip_f.extractall("data/")

In [6]:
# to find any corupt img is there or not:
TARGET_DATA = ["Cat","Dog"]
PARENT_DIR = os.path.join("data","PetImages")

In [7]:
#!pip install Pillow
from PIL import Image

In [36]:
BAD_DATA_DIR = "bad_data"
os.makedirs(BAD_DATA_DIR,exist_ok=True)

In [14]:
import shutil
import numpy as np

## Basic weeding/cleaning data process

In [15]:
for dirs in os.listdir(PARENT_DIR):
    full_path_data_dir = os.path.join(PARENT_DIR, dirs)
    for imgs in os.listdir(full_path_data_dir):
        path_to_img = os.path.join(full_path_data_dir,imgs)
        try:
            img = Image.open(path_to_img)
            img.verify()
            if np.array(img).shape[-1] == 2:
                print(f"Image is verified:: {path_to_img},{np.array(img).shape}")
        except Exception as e:
            print(f"{path_to_img} is bad file,{np.array(img).shape}")
            #bad_data_path = os.path.join(BAD_DATA_DIR,imgs)
            #shutil.move(path_to_img,bad_data_path)
            pass

AttributeError: 'NoneType' object has no attribute 'seek'

In [39]:
## Arranging the image size to uniform

IMG_SIZE = (180,180)## making size to square, will do the width and height calc easier and we will not loose any data
BATCH_SIZE = 32


In [41]:
# creating training and validation dataset

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    PARENT_DIR,
    validation_split = 0.2,
    subset = "training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    PARENT_DIR,
    validation_split = 0.2,
    subset = "validation",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

Found 24998 files belonging to 2 classes.
Using 19999 files for training.
Found 24998 files belonging to 2 classes.
Using 4999 files for validation.


In [42]:
### Create a log dir for tensorboard logs

import time

def get_log_path(base_log_dir=os.path.join("logs","fit")):
    uniqueName = time.asctime().replace(" ", "_").replace(":", "")
    log_path = os.path.join(base_log_dir, uniqueName)
    print(f"saving logs at: {log_path}")
    return log_path

log_dir = get_log_path()

saving logs at: logs\fit\Thu_May_12_214753_2022


In [43]:
file_writer = tf.summary.create_file_writer(logdir=log_dir)

In [44]:
for images, labels in train_ds.take(1):
    print(images.shape, labels)

(32, 180, 180, 3) tf.Tensor([0 0 1 0 0 0 1 0 1 0 1 0 0 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1 1 1], shape=(32,), dtype=int32)


In [45]:
images.shape

TensorShape([32, 180, 180, 3])

In [46]:
import numpy as np

with file_writer.as_default():
    images = np.array(images) ### (20, 28, 28, 1)

    tf.summary.image("samples", images.astype("uint8"), max_outputs=25, step=0)

In [48]:
AUG_STEPS = [
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1)
]

data_aug_layer = tf.keras.Sequential(AUG_STEPS)

In [50]:
## directly creating aug data
augmented_train_ds = train_ds.map(
    lambda x, y: (data_aug_layer(x, training=True), y)
)

In [51]:
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)

In [52]:
for img, label in train_ds.take(1):
    print(img.shape)

(32, 180, 180, 3)


In [53]:
LAYERS = [
    tf.keras.layers.Input(shape=(180,180,3)),
    tf.keras.layers.Conv2D(32, (3,3), activation="relu"),
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(32, (3,3), activation="relu"),
    tf.keras.layers.MaxPool2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(8, activation="relu"),
    tf.keras.layers.Dense(2, activation="softmax")
]

classifier = tf.keras.Sequential(LAYERS)

In [54]:
classifier.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 178, 178, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 89, 89, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 87, 87, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 43, 43, 32)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 59168)             0         
                                                                 
 dense (Dense)               (None, 8)                

In [55]:
classifier.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [56]:
EPOCHS = 1
classifier.fit(train_ds, epochs=EPOCHS, validation_data = val_ds)



InvalidArgumentError: Graph execution error:

Number of channels inherent in the image must be 1, 3 or 4, was 2
	 [[{{node decode_image/DecodeImage}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_1482]