In [1]:
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm
from zipfile import ZipFile

In [2]:
from PIL import Image
import shutil
import time

In [3]:
data_directory_path = "Data"
data_zipfile_name = "archive.zip"
data_zipfile_path = os.path.join(data_directory_path, data_zipfile_name)

In [4]:
# with ZipFile(data_zipfile_path, "r") as zip_f:
#     zip_f.extractall("Data/")

In [5]:
target_data_dirs = ["cats", "dogs"]
parent_directory = os.path.join(data_directory_path, "training_set")
parent_directory = os.path.join(parent_directory, "training_set")
print(parent_directory)

Data\training_set\training_set


In [6]:
BAD_DATA_DIR = "Bad_data"
os.makedirs(BAD_DATA_DIR, exist_ok = True)

In [7]:
# Finding full path to the images and verifying the Images  

for dirs in os.listdir(parent_directory):
    full_path_data_dirs = os.path.join(parent_directory, dirs)
    for img in os.listdir(full_path_data_dirs):
        full_path_image = os.path.join(full_path_data_dirs, img)
        try:
            image = Image.open(full_path_image)
            image.verify()
#             print(f"{full_path_image} --> is varified")
            
        except Exception as e:
#             print(f"{full_path_image} --> is BAD")
            bad_data_path = os.path.join(BAD_DATA_DIR, img)
            shutil.move(full_path_image, bad_data_path)
            

In [8]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [9]:
#  img_dataset_from_directory will take the parent directory path
#  it will split the data set, resize the images 


train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    parent_directory,
    validation_split = 0.2,
    subset = "training",
    seed = 13,
    image_size = IMG_SIZE,
    batch_size = BATCH_SIZE,
)

valid_ds = tf.keras.preprocessing.image_dataset_from_directory(
    parent_directory,
    validation_split = 0.2,
    subset = "validation",
    seed = 13,
    image_size = IMG_SIZE,
    batch_size = BATCH_SIZE,
)


Found 8005 files belonging to 2 classes.
Using 6404 files for training.
Found 8005 files belonging to 2 classes.
Using 1601 files for validation.


In [10]:
#  creating a unique name for log directory everytime

def get_log_path(base_dir = os.path.join("log_CNN_example", "fit")):
    uniquename = time.asctime().replace(" ", "_").replace(":", "")
    log_path = os.path.join(base_dir, uniquename)
    print(f"saving logs at: {log_path}")
    return log_path    

In [11]:
log_dir = get_log_path()

saving logs at: log_CNN_example\fit\Thu_May__9_184649_2024


In [12]:
#  train_ds will give images and label
#  labels it will select from folder names 
#  .take(1) will take only 1 batch (32 pics)

for imgs, labels in train_ds.take(1):
    print(imgs.shape, labels )

(32, 224, 224, 3) tf.Tensor([0 1 1 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 0 0 1 1 1 1 1 0 0 1 0 1 1], shape=(32,), dtype=int32)


In [13]:
imgs.shape

TensorShape([32, 224, 224, 3])

In [14]:
# lets create a file writer to see the images on tensor board

file_writer = tf.summary.create_file_writer(logdir = log_dir)

In [15]:
with file_writer.as_default():
    images = np.array(imgs)
    
    tf.summary.image("samples", images.astype("uint8"), max_outputs = 10, step = 0)

In [16]:
#  lets do the data augmentation
#  we can use tf.keras.preprocessing.image.ImageDataGenerator()
#  the other way is by adding a layer of augmetation

AUG_STEPS = [
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1)
]

data_aug_layer = tf.keras.Sequential(AUG_STEPS)