In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

import augly.image as imaugs

In [4]:
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from shutil import copyfile
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score

In [5]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Define paths
data_dir = 'data'
source_dir = os.path.join(data_dir, 'test')  # Assuming Kaggle's test folder is placed here
original_train_dir = os.path.join(data_dir, 'train_original')
augmented_train_dir = os.path.join(data_dir, 'train_augmented')
test_split_dir = os.path.join(data_dir, 'test_split')

# Create directories
os.makedirs(original_train_dir, exist_ok=True)
os.makedirs(augmented_train_dir, exist_ok=True)
os.makedirs(test_split_dir, exist_ok=True)

# Step 1: Split the dataset into train and test
def split_dataset(source_dir, train_dir, test_dir, split_ratio=0.8):
    classes = ['cats', 'dogs']
    for cls in classes:
        os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
        os.makedirs(os.path.join(test_dir, cls), exist_ok=True)
        src_cls_dir = os.path.join(source_dir, cls)
        files = [f for f in os.listdir(src_cls_dir) if os.path.isfile(os.path.join(src_cls_dir, f))]
        random.shuffle(files)
        split_idx = int(len(files) * split_ratio)
        train_files = files[:split_idx]
        test_files = files[split_idx:]
        for f in train_files:
            src = os.path.join(src_cls_dir, f)
            dst = os.path.join(train_dir, cls, f)
            copyfile(src, dst)
        for f in test_files:
            src = os.path.join(src_cls_dir, f)
            dst = os.path.join(test_dir, cls, f)
            copyfile(src, dst)

print("Splitting dataset into train and test...")
split_dataset(source_dir, original_train_dir, test_split_dir, split_ratio=0.8)

# Step 2: Define data augmentation functions
augmentation_functions = [
    imaugs.RandomBlur(min_radius=1.0, max_radius=3.0),
    imaugs.RandomRotation(min_degrees=-30, max_degrees=30),
    imaugs.RandomPixelization(min_ratio=0.1, max_ratio=0.5),
    imaugs.HFlip(p=1.0),
    imaugs.VFlip(p=1.0),
    imaugs.Grayscale(p=1.0),
    imaugs.ColorJitter(brightness_factor=0.5, contrast_factor=0.5, saturation_factor=0.5),
    
    imaugs.RandomBrightness(min_factor=0.1, max_factor=0.5),
    
    imaugs.RandomNoise(),

]

def custom_augment(image_path, output_path):
    image = Image.open(image_path).convert('RGB')
    selected_augs = random.sample(augmentation_functions, 3)
    aug = imaugs.Compose(selected_augs)
    augmented_image = aug(image)
    augmented_image.save(output_path)

# Step 3: Generate augmented images
def generate_augmented_images(train_dir, augmented_train_dir, num_augmented=2):
    classes = ['cats', 'dogs']
    for cls in classes:
        cls_dir = os.path.join(train_dir, cls)
        augmented_cls_dir = os.path.join(augmented_train_dir, cls)
        os.makedirs(augmented_cls_dir, exist_ok=True)
        # Copy original images
        for filename in os.listdir(cls_dir):
            if filename.startswith('.'):
                continue
            src = os.path.join(cls_dir, filename)
            dst = os.path.join(augmented_cls_dir, filename)
            copyfile(src, dst)
        # Generate augmented images
        for filename in os.listdir(cls_dir):
            if filename.startswith('.'):
                continue
            src_path = os.path.join(cls_dir, filename)
            base, ext = os.path.splitext(filename)
            for i in range(num_augmented):
                output_path = os.path.join(augmented_cls_dir, f"{base}_aug{i}{ext}")
                custom_augment(src_path, output_path)

print("Generating augmented images...")
generate_augmented_images(original_train_dir, augmented_train_dir, num_augmented=2)

# Step 4: Plot dataset statistics
def plot_counts(original_dir, augmented_dir):
    classes = ['cats', 'dogs']
    original_counts = {}
    augmented_counts = {}
    for cls in classes:
        original_cls = os.path.join(original_dir, cls)
        augmented_cls = os.path.join(augmented_dir, cls)
        original_counts[cls] = len(os.listdir(original_cls))
        augmented_counts[cls] = len(os.listdir(augmented_cls))
    
    plt.figure(figsize=(10, 6))
    bar_width = 0.35
    index = np.arange(len(classes))
    plt.bar(index, [original_counts[cls] for cls in classes], bar_width, label='Original')
    plt.bar(index + bar_width, [augmented_counts[cls] for cls in classes], bar_width, label='Augmented')
    plt.xlabel('Class')
    plt.ylabel('Number of Images')
    plt.title('Dataset Statistics Before and After Augmentation')
    plt.xticks(index + bar_width/2, classes)
    plt.legend()
    plt.tight_layout()
    plt.savefig('dataset_stats.png')
    plt.show()

print("Plotting dataset statistics...")
plot_counts(original_train_dir, augmented_train_dir)

Splitting dataset into train and test...


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data\\test\\cats'