In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [9]:
# Transformasi dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load Fashion MNIST
train_dataset = datasets.FashionMNIST(
    root='datasets/test',
    train=True,
    download=True,
    transform=transform
)
test_dataset = datasets.FashionMNIST(
    root='test',
    train=False,
    download=True,
    transform=transform
)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to datasets/test/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:01<00:00, 18.7MB/s]


Extracting datasets/test/FashionMNIST/raw/train-images-idx3-ubyte.gz to datasets/test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to datasets/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 310kB/s]


Extracting datasets/test/FashionMNIST/raw/train-labels-idx1-ubyte.gz to datasets/test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to datasets/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 5.53MB/s]


Extracting datasets/test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to datasets/test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to datasets/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 4.80MB/s]


Extracting datasets/test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to datasets/test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to test/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:01<00:00, 17.7MB/s]


Extracting test/FashionMNIST/raw/train-images-idx3-ubyte.gz to test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to test/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 311kB/s]


Extracting test/FashionMNIST/raw/train-labels-idx1-ubyte.gz to test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 5.63MB/s]


Extracting test/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to test/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 4.53MB/s]

Extracting test/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to test/FashionMNIST/raw






In [10]:
class CNNModel(nn.Module):
    def __init__(self, kernel_size=3, pooling_type='max'):
        super(CNNModel, self).__init__()

        # Padding to maintain spatial dimensions
        padding = kernel_size // 2

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=kernel_size, padding=padding)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=kernel_size, padding=padding)

        # Pooling layer
        if pooling_type == 'max':
            self.pool = nn.MaxPool2d(2, 2)
        else:  # 'avg'
            self.pool = nn.AvgPool2d(2, 2)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

        # Activation functions
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [11]:
class EarlyStopper:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

def train_model(model, train_loader, val_loader, optimizer, criterion, epochs,
                scheduler=None, early_stopper=None, device='cuda'):
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = 100 * correct / total

        # Update learning rate
        if scheduler:
            scheduler.step(val_loss)

        # Early stopping
        if early_stopper:
            early_stopper(val_loss)
            if early_stopper.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

        # Record metrics
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

    return train_losses, val_losses, train_accs, val_accs

In [12]:
def compare_models(kernel_sizes, pooling_types, epochs_list, optimizers_list):
    # Load and preprocess MNIST dataset
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
    val_dataset = datasets.MNIST('./data', train=False, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    criterion = nn.CrossEntropyLoss()

    results = {}

    # Compare kernel sizes
    for kernel_size in kernel_sizes:
        model = CNNModel(kernel_size=kernel_size, pooling_type='max').to(device)
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
        early_stopper = EarlyStopper(patience=5)

        results[f'kernel_{kernel_size}'] = train_model(
            model, train_loader, val_loader, optimizer, criterion, 50,
            scheduler, early_stopper, device
        )

    # Compare pooling types
    for pooling_type in pooling_types:
        model = CNNModel(kernel_size=3, pooling_type=pooling_type).to(device)
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
        early_stopper = EarlyStopper(patience=5)

        results[f'pooling_{pooling_type}'] = train_model(
            model, train_loader, val_loader, optimizer, criterion, 50,
            scheduler, early_stopper, device
        )

    # Compare epochs
    for epochs in epochs_list:
        model = CNNModel().to(device)
        optimizer = optim.Adam(model.parameters())
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
        early_stopper = EarlyStopper(patience=5)

        results[f'epochs_{epochs}'] = train_model(
            model, train_loader, val_loader, optimizer, criterion, epochs,
            scheduler, early_stopper, device
        )

    # Compare optimizers
    for opt_name in optimizers_list:
        model = CNNModel().to(device)

        if opt_name == 'SGD':
            optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        elif opt_name == 'RMSprop':
            optimizer = optim.RMSprop(model.parameters())
        else:  # Adam
            optimizer = optim.Adam(model.parameters())

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
        early_stopper = EarlyStopper(patience=5)

        results[f'optimizer_{opt_name}'] = train_model(
            model, train_loader, val_loader, optimizer, criterion, 50,
            scheduler, early_stopper, device
        )

    return results

In [13]:
def plot_results(results):
    # Plot comparison results
    fig, axes = plt.subplots(4, 2, figsize=(15, 20))

    # Plot kernel size comparison
    for kernel_size in [3, 5, 7]:
        axes[0, 0].plot(results[f'kernel_{kernel_size}'][2], label=f'{kernel_size}x{kernel_size}')
    axes[0, 0].set_title('Kernel Size Comparison - Accuracy')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy (%)')
    axes[0, 0].legend()

    # Plot pooling comparison
    for pooling_type in ['max', 'avg']:
        axes[1, 0].plot(results[f'pooling_{pooling_type}'][2], label=pooling_type)
    axes[1, 0].set_title('Pooling Type Comparison - Accuracy')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Accuracy (%)')
    axes[1, 0].legend()

    # Plot epochs comparison
    for epochs in [5, 50, 100, 250, 350]:
        axes[2, 0].plot(results[f'epochs_{epochs}'][2][:epochs], label=f'{epochs} epochs')
    axes[2, 0].set_title('Epochs Comparison - Accuracy')
    axes[2, 0].set_xlabel('Epoch')
    axes[2, 0].set_ylabel('Accuracy (%)')
    axes[2, 0].legend()

    # Plot optimizer comparison
    for opt_name in ['SGD', 'RMSprop', 'Adam']:
        axes[3, 0].plot(results[f'optimizer_{opt_name}'][2], label=opt_name)
    axes[3, 0].set_title('Optimizer Comparison - Accuracy')
    axes[3, 0].set_xlabel('Epoch')
    axes[3, 0].set_ylabel('Accuracy (%)')
    axes[3, 0].legend()

    # Plot corresponding losses
    for kernel_size in [3, 5, 7]:
        axes[0, 1].plot(results[f'kernel_{kernel_size}'][0], label=f'{kernel_size}x{kernel_size}')
    axes[0, 1].set_title('Kernel Size Comparison - Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()

    for pooling_type in ['max', 'avg']:
        axes[1, 1].plot(results[f'pooling_{pooling_type}'][0], label=pooling_type)
    axes[1, 1].set_title('Pooling Type Comparison - Loss')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Loss')
    axes[1, 1].legend()

    for epochs in [5, 50, 100, 250, 350]:
        axes[2, 1].plot(results[f'epochs_{epochs}'][0][:epochs], label=f'{epochs} epochs')
    axes[2, 1].set_title('Epochs Comparison - Loss')
    axes[2, 1].set_xlabel('Epoch')
    axes[2, 1].set_ylabel('Loss')
    axes[2, 1].legend()

    for opt_name in ['SGD', 'RMSprop', 'Adam']:
        axes[3, 1].plot(results[f'optimizer_{opt_name}'][0], label=opt_name)
    axes[3, 1].set_title('Optimizer Comparison - Loss')
    axes[3, 1].set_xlabel('Epoch')
    axes[3, 1].set_ylabel('Loss')
    axes[3, 1].legend()

    plt.tight_layout()
    plt.show()

In [None]:
# Run comparison
kernel_sizes = [3, 5, 7]
pooling_types = ['max', 'avg']
epochs_list = [5, 50, 100, 250, 350]
optimizers_list = ['SGD', 'RMSprop', 'Adam']

results = compare_models(kernel_sizes, pooling_types, epochs_list, optimizers_list)
plot_results(results)

Epoch 1/50:
Train Loss: 0.2167, Train Acc: 93.36%
Val Loss: 0.0468, Val Acc: 98.43%
Epoch 2/50:
Train Loss: 0.0787, Train Acc: 97.68%
Val Loss: 0.0365, Val Acc: 98.85%
Epoch 3/50:
Train Loss: 0.0631, Train Acc: 98.12%
Val Loss: 0.0288, Val Acc: 99.12%
Epoch 4/50:
Train Loss: 0.0495, Train Acc: 98.57%
Val Loss: 0.0235, Val Acc: 99.24%
Epoch 5/50:
Train Loss: 0.0417, Train Acc: 98.72%
Val Loss: 0.0220, Val Acc: 99.25%
