In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import json
import zipfile
from datetime import datetime
import os

In [None]:
os.chdir("/content/drive/MyDrive/DogCat")

In [None]:
SEED = 42
BATCH_SIZE = 32
NUM_EPOCHS = 2
IMAGE_SIZE = 224
ROOT_DIR = Path("./dogs-vs-cats")
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [None]:
class CNN(nn.Module):
    def __init__(self, activation='relu', init_type='xavier'):
        super(CNN, self).__init__()

        # Define activation function
        self.activation = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh(),
            'leaky_relu': nn.LeakyReLU(0.1)
        }[activation]

        # First convolutional block
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout2d(0.25)

        # Second convolutional block
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.dropout2 = nn.Dropout2d(0.25)

        # Third convolutional block
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.dropout3 = nn.Dropout2d(0.25)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 28 * 28, 512)
        self.fc_bn1 = nn.BatchNorm1d(512)
        self.fc_dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 2)

        # Initialize weights
        self.init_weights(init_type)

    def init_weights(self, init_type):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                if init_type == 'xavier':
                    nn.init.xavier_uniform_(m.weight)
                elif init_type == 'kaiming':
                    nn.init.kaiming_uniform_(m.weight)
                else:  # random
                    nn.init.uniform_(m.weight, -0.1, 0.1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        # First block
        x = self.pool1(self.activation(self.bn1(self.conv1(x))))
        x = self.dropout1(x)

        # Second block
        x = self.pool2(self.activation(self.bn2(self.conv2(x))))
        x = self.dropout2(x)

        # Third block
        x = self.pool3(self.activation(self.bn3(self.conv3(x))))
        x = self.dropout3(x)

        # Flatten
        x = x.view(-1, 128 * 28 * 28)

        # Fully connected layers
        x = self.activation(self.fc_bn1(self.fc1(x)))
        x = self.fc_dropout1(x)
        x = self.fc2(x)

        return x

In [None]:
# Data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Extract and organize data if needed
if not (ROOT_DIR / "structured_train").exists():
    with zipfile.ZipFile(ROOT_DIR / "train.zip", 'r') as zip_ref:
        zip_ref.extractall(ROOT_DIR)

    # Organize into class folders
    for category in ['cats', 'dogs']:
        (ROOT_DIR / "structured_train" / category).mkdir(parents=True, exist_ok=True)

    train_dir = ROOT_DIR / "train"
    for img_path in train_dir.glob("*"):
        category = 'cats' if img_path.name.startswith('cat') else 'dogs'
        img_path.rename(ROOT_DIR / "structured_train" / category / img_path.name)


# Load dataset
dataset = torchvision.datasets.ImageFolder(ROOT_DIR / "structured_train", transform=transform)

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
def train_model(model, train_loader, val_loader, optimizer_name='adam', lr=0.001):
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()

    # Initialize optimizer
    if optimizer_name == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:  # rmsprop
        optimizer = optim.RMSprop(model.parameters(), lr=lr)

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3)

    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': []
    }

    best_val_acc = 0

    for epoch in range(NUM_EPOCHS):
        # Training phase
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        # Validation phase
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

        # Calculate metrics
        train_loss = train_loss / len(train_loader)
        train_acc = 100. * train_correct / train_total
        val_loss = val_loss / len(val_loader)
        val_acc = 100. * val_correct / val_total

        # Update scheduler
        scheduler.step(val_loss)

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'epoch': epoch
            }, f'best_model_{optimizer_name}.pth')

        # Update history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Epoch {epoch+1}/{NUM_EPOCHS}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        print("-" * 50)

    return history, best_val_acc

In [None]:
def plot_results(histories, title):
    plt.figure(figsize=(15, 5))

    # Plot loss
    plt.subplot(1, 2, 1)
    for name, history in histories.items():
        plt.plot(history['train_loss'], label=f'{name}_train')
        plt.plot(history['val_loss'], label=f'{name}_val')
    plt.title(f'{title} - Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    for name, history in histories.items():
        plt.plot(history['train_acc'], label=f'{name}_train')
        plt.plot(history['val_acc'], label=f'{name}_val')
    plt.title(f'{title} - Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
def fine_tune_model(model_name, train_loader, val_loader, lr=0.0001, num_epochs=10):
    """Fine-tune different pre-trained models"""
    if model_name == 'resnet18':
        model = models.resnet18(pretrained=True)
    elif model_name == 'resnet50':
        model = models.resnet50(pretrained=True)
    elif model_name == 'resnet101':
        model = models.resnet101(pretrained=True)
    elif model_name == 'vgg16':
        model = models.vgg16(pretrained=True)
    elif model_name == 'efficientnet_b0':
        model = models.efficientnet_b0(pretrained=True)
    else:
        raise ValueError("Unsupported model")

    # Freeze feature extractor layers
    for param in model.parameters():
        param.requires_grad = False

    # Modify the final classification layer
    if 'resnet' in model_name:
        model.fc = nn.Linear(model.fc.in_features, 2)
    elif 'vgg' in model_name:
        model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, 2)
    elif 'efficientnet' in model_name:
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr)

    best_acc = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        train_acc = 100. * correct / total
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {train_loss:.4f}, Acc: {train_acc:.2f}%")

    return model

In [None]:
# Configuration combinations
activations = ['relu', 'tanh', 'leaky_relu']
initializations = ['xavier', 'kaiming', 'random']
optimizers = ['sgd', 'adam', 'rmsprop']

results = {}
histories = {}

In [None]:
# Run experiments with custom CNN
for act in activations:
    for init in initializations:
        for opt in optimizers:
            print(f"\nInitialization: {init}, Activation: {act}, Optimizer: {opt}")
            model = CNN(activation=act, init_type=init)
            history, acc = train_model(model, train_loader, val_loader, opt)

            config_name = f"{act}_{init}_{opt}"
            results[config_name] = acc
            histories[config_name] = history


Initialization: xavier, Activation: relu, Optimizer: sgd
Epoch 1/2
Train Loss: 0.7385, Train Acc: 60.12%
Val Loss: 0.6012, Val Acc: 66.82%
--------------------------------------------------
Epoch 2/2
Train Loss: 0.6203, Train Acc: 65.94%
Val Loss: 0.5714, Val Acc: 69.96%
--------------------------------------------------

Initialization: xavier, Activation: relu, Optimizer: adam
Epoch 1/2
Train Loss: 0.7293, Train Acc: 61.81%
Val Loss: 0.5618, Val Acc: 70.40%
--------------------------------------------------
Epoch 2/2
Train Loss: 0.5923, Train Acc: 68.09%
Val Loss: 0.5208, Val Acc: 73.54%
--------------------------------------------------

Initialization: xavier, Activation: relu, Optimizer: rmsprop
Epoch 1/2
Train Loss: 0.7105, Train Acc: 60.87%
Val Loss: 0.6098, Val Acc: 68.02%
--------------------------------------------------
Epoch 2/2
Train Loss: 0.6109, Train Acc: 66.70%
Val Loss: 0.5444, Val Acc: 72.26%
--------------------------------------------------

Initialization: kaimin

In [None]:
# Save results
with open('experiment_results.json', 'w') as f:
    json.dump(results, f, indent=4)

# Plot results
plot_results(histories, 'Model Comparison')

# Print best configuration
best_config = max(results.items(), key=lambda x: x[1])
print(f"\nBest configuration: {best_config[0]} with accuracy {best_config[1]:.2f}%")

In [None]:
# models_to_test = ['resnet18', 'resnet50', 'resnet101', 'vgg16', 'efficientnet_b0']
models_to_test = ['resnet18']

results = {}
for model_name in models_to_test:
    print(f"Training {model_name}...")
    results[model_name] = fine_tune_model(model_name, train_loader, val_loader)