In [None]:
#Extracting Dataset Files
import zipfile
import os

# Define dataset paths
data_path = "../data/raw/"
train_dir = os.path.join(data_path, 'train', 'train')
test_dir = os.path.join(data_path, 'test', 'test1')

# Extract train.zip
with zipfile.ZipFile(os.path.join(data_path, 'train.zip'), 'r') as zip_ref:
    zip_ref.extractall(os.path.join(data_path, 'train'))

# Extract test1.zip
with zipfile.ZipFile(os.path.join(data_path, 'test1.zip'), 'r') as zip_ref:
    zip_ref.extractall(os.path.join(data_path, 'test'))

# Check extracted files count
train_files = os.listdir(os.path.join(data_path, 'train', 'train'))
test_files = os.listdir(os.path.join(data_path, 'test', 'test1'))
print(f'Train files: {len(train_files)}')
print(f'Test files: {len(test_files)}')

In [None]:
# Checking Label Distribution
# Count dog and cat images
dog_images = [f for f in train_files if 'dog' in f]
cat_images = [f for f in train_files if 'cat' in f]

print(f"Number of dog images: {len(dog_images)}")
print(f"Number of cat images: {len(cat_images)}")

In [None]:
# Checking Image Size
from PIL import Image

# Load and check the size of an image
image_path = os.path.join(train_dir, dog_images[0])  # Example: a dog image
image = Image.open(image_path)

print(f"Image size: {image.size}")  # (width, height)

In [None]:
# Data Augmentation Preview
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt

# Define augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load a sample image
sample_image = Image.open(image_path)
sample_image = sample_image.resize((224, 224))
sample_image_array = np.array(sample_image)

# Apply augmentation
augmented_image = datagen.random_transform(sample_image_array)

# Display augmented image
plt.imshow(augmented_image)
plt.title("Augmented Image")
plt.show()

In [None]:
# Image Preprocessing Function
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import os

# Function to load and preprocess an image
def load_and_preprocess_image(image_path, target_size=(128, 128)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = img_array / 255.0  # Normalize pixel values (0-255 -> 0-1)
    return img_array

# Get list of train files
train_folder_path = os.path.join('../data/raw/train/train')
train_files = os.listdir(train_folder_path)

# Load and check first 5 images
example_images = train_files[:5]
processed_images = [load_and_preprocess_image(os.path.join(train_folder_path, img)) for img in example_images]

# Print processed image shapes
processed_images_shapes = [img.shape for img in processed_images]
print(f"Processed image shapes: {processed_images_shapes}")

In [None]:
import os
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the path for the raw training data
data_path = "../data/raw/train/train"  # Training data path
processed_path = "../data/processed"   # Directory where organized data will be stored

# Create separate directories for cats and dogs under processed folder
cats_dir = os.path.join(processed_path, 'cats')
dogs_dir = os.path.join(processed_path, 'dogs')

# Create directories if they don't exist
os.makedirs(cats_dir, exist_ok=True)
os.makedirs(dogs_dir, exist_ok=True)

# Move images to corresponding folders based on names
for image_name in os.listdir(data_path):
    image_path = os.path.join(data_path, image_name)

    if os.path.isfile(image_path):
        if 'cat' in image_name.lower():
            target_path = os.path.join(cats_dir, image_name)
        elif 'dog' in image_name.lower():
            target_path = os.path.join(dogs_dir, image_name)
        else:
            continue  # Skip irrelevant files

        shutil.copy(image_path, target_path)

print("Images successfully organized into cats and dogs directories!")

# Define augmentation settings for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Set the path for the newly organized data (cats and dogs directories)
train_dir = os.path.join(processed_path)

# Create training data generator
train_generator = train_datagen.flow_from_directory(
    train_dir,  # Training data path
    target_size=(224, 224),  # Resize all images to 224x224
    batch_size=32,
    class_mode='binary',  # Binary classification (dog vs cat)
    shuffle=True  # Shuffle data
)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

# Updated Data Directories for Cats and Dogs
train_dir = "../data/processed/"
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Image Preprocessing (ImageDataGenerator)
train_datagen = ImageDataGenerator(
    rescale=1./255,  
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # 20% validation data
)

# Training Dataset
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,  # Reduced for memory optimization
    class_mode='binary',
    subset="training"  # Using training data subset
)

# Validation Dataset
validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='binary',
    subset="validation"  # Using validation data subset
)

# Model Definition
model = Sequential()

# Convolutional Layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))  
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))  
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))  
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten Layer
model.add(Flatten())

# Fully Connected (Dense) Layer
model.add(Dense(128, activation='relu'))  # Reduced neurons from 256 to 128
model.add(Dropout(0.5))  # To prevent overfitting

# Output Layer
model.add(Dense(1, activation='sigmoid'))

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Model Summary
model.summary()

# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the Model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=50,
    validation_data=validation_generator,
    validation_steps=len(validation_generator),
    callbacks=[early_stopping]
)

# Save the Model
tf.keras.models.save_model(model, '../models/cat_dog_classifier_model.h5')    # Saving the trained model

print("Model successfully saved!")

In [None]:
import tensorflow as tf

model = tf.keras.models.load_model('../models/cat_dog_classifier_model.h5')
model.summary()

In [None]:
import csv
import os

# Create the results directory if it doesn't exist
results_dir = '../results'
os.makedirs(results_dir, exist_ok=True)

# Create a file to save training and validation accuracy and loss values
history_data = history.history

# Saving the model summary to a file
model_summary_path = os.path.join(results_dir, 'model_summary.txt')
with open(model_summary_path, 'w') as f:
    model.summary(print_fn=lambda x: f.write(x + '\n'))

# Saving training and validation accuracy and loss values to a CSV file
history_path = os.path.join(results_dir, 'training_history.csv')
with open(history_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['epoch', 'train_loss', 'train_accuracy', 'val_loss', 'val_accuracy'])

    # Write the data
    for epoch in range(len(history_data['loss'])):
        writer.writerow([
            epoch + 1,
            history_data['loss'][epoch],
            history_data['accuracy'][epoch],
            history_data['val_loss'][epoch],
            history_data['val_accuracy'][epoch]
        ])

print("Model summary and training history have been saved successfully!")

In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from tqdm import tqdm  # To add a progress bar

# Load the saved model
model = tf.keras.models.load_model('../models/cat_dog_classifier_model.h5')

# Test directory path
test_dir = "../data/raw/test/test1"

# Sample submission template
sample_submission_path = "../data/raw/sampleSubmission.csv"
sample_submission = pd.read_csv(sample_submission_path)

# Initialize an empty list to store predictions
predictions = []

# Get the total number of images in the test directory
total_images = len([image_name for image_name in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, image_name))])

# Process test images with a custom progress bar
for idx, image_name in enumerate(tqdm(os.listdir(test_dir), desc="Processing", total=total_images, unit="image")):
    image_path = os.path.join(test_dir, image_name)
    
    if os.path.isfile(image_path):
        # Preprocess the image (resize and normalize)
        img = image.load_img(image_path, target_size=(224, 224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
        img_array /= 255.0  # Normalize the image (same as in training)

        # Make prediction (0 for cat, 1 for dog)
        prediction = model.predict(img_array, verbose=0)  # Suppress individual prediction output
        predicted_class = 1 if prediction[0][0] > 0.5 else 0  # 0 is for cat, 1 is for dog
        predictions.append(predicted_class)

# Add predictions to the sample submission DataFrame with 'id' and 'label' columns
submission = pd.DataFrame({
    'id': [os.path.splitext(image_name)[0] for image_name in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, image_name))],
    'label': predictions
})

# Save the result as a CSV file in the submission directory
submission_path = "../submission/test_predictions.csv"
submission.to_csv(submission_path, index=False)

print(f"Predictions saved to {submission_path}")
