In [11]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [12]:
# Creating a project folder
import os
os.makedirs('/kaggle/working/dogs-vs-cats', exist_ok=True)


I created a directory named dogs-vs-cats inside kaggle's working directory and set exist_ok = True ensures no error if the folder already exists.

In [13]:
# Unzip the train.zip file
import zipfile

with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip', 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/dogs-vs-cats')


just opened the train.zip from kaggle's input directory and extracted all training images into my working directory

In [14]:
os.listdir('/kaggle/working/dogs-vs-cats')


['train']

In [15]:
os.listdir('/kaggle/working/dogs-vs-cats/train')[:5]


['cat.6426.jpg',
 'dog.5341.jpg',
 'dog.12433.jpg',
 'cat.1450.jpg',
 'cat.8978.jpg']

In [16]:
# Creating a class folder
base_dir = '/kaggle/working/dogs-vs-cats/train'

os.makedirs(base_dir + '/cats', exist_ok=True)
os.makedirs(base_dir + '/dogs', exist_ok=True)


created subdirectories per class 

In [None]:
# Moving images into correct folders
import shutil

for file in os.listdir(base_dir):
    path = os.path.join(base_dir, file)

    if os.path.isdir(path):
        continue

    if file.startswith('cat'):
        shutil.move(path, base_dir + '/cats/' + file)
    elif file.startswith('dog'):
        shutil.move(path, base_dir + '/dogs/' + file)


Looping through all files in trian/ and moves cat*.jpg -> cats/ and dog*.jpg -> dogs/ 

In [None]:
os.listdir('/kaggle/working/dogs-vs-cats/train')


In [None]:
#Unzip test.zip file
import zipfile, os

os.makedirs('/kaggle/working/dogs-vs-cats/test', exist_ok=True)

with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/test1.zip', 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/dogs-vs-cats/test')


Created a test directory and extracted test images which are unlabeled

In [None]:
# Creating training and validation dataset
import tensorflow as tf

DATA_DIR = '/kaggle/working/dogs-vs-cats/train'

train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(256, 256),
    batch_size=64,
    labels='inferred',
    label_mode='int',
    validation_split=0.2,
    subset='training',
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(256, 256),
    batch_size=32,
    labels='inferred',
    label_mode='int',
    validation_split=0.2,
    subset='validation',
    seed=42
)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from keras import Sequential
from keras.layers import Conv2D , MaxPooling2D , GlobalAveragePooling2D , Dense , BatchNormalization , Dropout

In [None]:
# Keras preprocessing layers 
# Resizing and Rescaling 
IMG_SIZE = 160

resize_and_rescale = tf.keras.Sequential([
    layers.Resizing(IMG_SIZE , IMG_SIZE),
    layers.Rescaling(1./255)
])

# Data Augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomWidth(0.1),
    tf.keras.layers.RandomHeight(0.1),
])

# Making the preprocessing layers part of our model
model = Sequential([
    tf.keras.Input(shape=(256,256,3)),
    resize_and_rescale,
    data_augmentation,

    Conv2D(32, (3,3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),

    Conv2D(64, (3,3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),

    Conv2D(128, 3, padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),

    Conv2D(256, 3, padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),

    GlobalAveragePooling2D(),

    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

In [None]:
# Lowering the learning rate 
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy',
             metrics=['accuracy'])
# Using Early Stopping
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=2
    )
]


In [None]:
model.summary()

In [None]:
import time

start = time.time()

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    callbacks=callbacks
)

end = time.time()

print(f"Training time: {end - start:.2f} seconds")


By increasing CNN depth and carefully controlling regularization and learning rate, I improved validation accuracy from ~70% to ~85% while maintaining good generalization, using a CNN trained entirely from scratch.

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], color='red' , label='train')
plt.plot(history.history['val_accuracy'], color='blue', label='validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], color='red' , label='train')
plt.plot(history.history['val_loss'], color='blue', label='validation')
plt.legend()
plt.show()

# Prediction on test data

In [None]:
TEST_DIR = "/kaggle/working/dogs-vs-cats/test/test1"
IMG_SIZE = 160     # same as training
BATCH_SIZE = 32


In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(
    TEST_DIR,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False,      
    labels=None
)


In [None]:
import os

image_files = sorted(
    [f for f in os.listdir(TEST_DIR) if f.endswith(".jpg")],
    key=lambda x: int(x.split(".")[0])
)

image_ids = [int(f.split(".")[0]) for f in image_files]


In [None]:
predictions = model.predict(test_ds)
predicted_labels = (predictions > 0.5).astype(int).reshape(-1)


In [None]:
predictions = model.predict(test_ds)
predicted_labels = (predictions > 0.5).astype(int).reshape(-1)


In [None]:
print(image_ids[:5])
print(predicted_labels[:5])
print(len(image_ids), len(predicted_labels))


In [None]:

label_map = {0: "cat", 1: "dog"}


In [None]:
import matplotlib.pyplot as plt
import os

# show first N images
N = 5

plt.figure(figsize=(15, 5))

for i in range(N):
    img_path = os.path.join(TEST_DIR, image_files[i])
    img = plt.imread(img_path)

    plt.subplot(1, N, i + 1)
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"ID: {image_ids[i]}\nPred: {label_map[predicted_labels[i]]}")

plt.show()


In [None]:
print(predictions[:5].reshape(-1))
