In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import AutoConfig, CvtForImageClassification
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from PIL import Image, UnidentifiedImageError
import os
from sklearn.metrics import confusion_matrix, f1_score, classification_report
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import seaborn as sns
import random
from collections import Counter
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Pastikan Anda telah menginstal albumentations:
# !pip install albumentations
# !pip install albumentations[pytorch]

# Set seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
NUM_CLASSES = 20
EPOCHS = 100
LEARNING_RATE = 0.00015836211715780283  # Learning rate yang lebih tinggi
WEIGHT_DECAY = 0.00002683
IMAGE_SIZE = 224
DATASET_PATH = '/kaggle/input/d/phiard/aksara-jawa/v3/v3'  # Pastikan path ini benar

# Konfigurasi model
config = AutoConfig.from_pretrained("microsoft/cvt-13")
config.num_labels = NUM_CLASSES
config.hidden_dropout_prob = 0.1  # Mengurangi dropout

model = CvtForImageClassification.from_pretrained(
    "microsoft/cvt-13", config=config, ignore_mismatched_sizes=True
).to(device)

# Pastikan semua parameter dioptimalkan
for param in model.parameters():
    param.requires_grad = True

# Definisikan Augmentasi Data dengan Albumentations untuk Pelatihan
train_transform = A.Compose([
    A.RandomResizedCrop(height=IMAGE_SIZE, width=IMAGE_SIZE, scale=(0.8, 1.0)),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
    A.HueSaturationValue(p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), 
                std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

# Transformasi untuk data validasi tanpa augmentasi
val_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225]),
])

class CustomImageFolderAlbumentations(Dataset):
    def __init__(self, root, transform=None):
        self.samples = []
        self.targets = []
        self.transform = transform
        self.classes, self.class_to_idx = self._find_classes(root)
        self.num_classes = len(self.classes)

        for target_class in sorted(self.class_to_idx.keys()):
            class_index = self.class_to_idx[target_class]
            target_dir = os.path.join(root, target_class)
            if not os.path.isdir(target_dir):
                continue
            for root_, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
                for fname in sorted(fnames):
                    path = os.path.join(root_, fname)
                    try:
                        with Image.open(path) as img:
                            img.verify()  # Verifikasi integritas gambar
                        self.samples.append((path, class_index))
                        self.targets.append(class_index)
                    except (UnidentifiedImageError, OSError):
                        print(f"Gambar korup dilewati: {path}")
                        continue

    def __getitem__(self, index):
        path, target = self.samples[index]
        with Image.open(path) as sample:
            sample = sample.convert('RGB')

        if self.transform:
            if isinstance(self.transform, A.core.composition.Compose):
                # Jika menggunakan Albumentations
                sample = np.array(sample)
                sample = self.transform(image=sample)['image']
            else:
                # Jika menggunakan torchvision.transforms
                sample = self.transform(sample)

        return sample, target

    def __len__(self):
        return len(self.samples)

    def _find_classes(self, dir):
        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        classes.sort()
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        return classes, class_to_idx

train_dir = os.path.join(DATASET_PATH, 'train')
val_dir = os.path.join(DATASET_PATH, 'val')

# Membuat dataset dengan transformasi yang sesuai
train_dataset = CustomImageFolderAlbumentations(train_dir, transform=train_transform)
val_dataset = CustomImageFolderAlbumentations(val_dir, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

# Verifikasi distribusi kelas
counter = Counter(train_dataset.targets)
print("Distribusi Kelas dalam Training Set:")
for cls, count in counter.items():
    print(f"Kelas {cls}: {count} sampel")

# Menghitung class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_dataset.targets),
    y=train_dataset.targets
)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
print(f"Class Weights: {class_weights}")

# Definisikan loss function tanpa label smoothing
criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.0)

# Definisikan optimizer dengan AdamW
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# Menggunakan scheduler OneCycleLR
scheduler = OneCycleLR(optimizer, max_lr=LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCHS)

# Implementasi MixUp
def mixup_data(x, y, alpha=0.2):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    if torch.cuda.is_available():
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

# Implementasi CutMix (Opsional)
def cutmix_data(x, y, alpha=1.0):
    '''Returns cutmixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size, C, H, W = x.size()
    if torch.cuda.is_available():
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
    y_a, y_b = y, y[index]
    
    # Generate a random rectangle
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bby1:bby2, bbx1:bbx2] = x[index, :, bby1:bby2, bbx1:bbx2]
    # Adjust lambda to exactly match pixel ratio
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size(-1) * x.size(-2)))
    return x, y_a, y_b, lam

def rand_bbox(size, lam):
    W = size[3]
    H = size[2]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

def train_epoch(model, loader, optimizer, criterion, scheduler, use_mixup=True, use_cutmix=False, alpha=0.2, clip_grad=True, max_norm=1.0):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        
        if use_mixup:
            data, targets_a, targets_b, lam = mixup_data(data, target, alpha=alpha)
            outputs = model(data).logits
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        elif use_cutmix:
            data, targets_a, targets_b, lam = cutmix_data(data, target, alpha=alpha)
            outputs = model(data).logits
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        else:
            outputs = model(data).logits
            loss = criterion(outputs, target)
        
        loss.backward()
        
        if clip_grad:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        
        optimizer.step()
        scheduler.step()
        
        total_loss += loss.item()
        
        if use_mixup or use_cutmix:
            _, predicted = torch.max(outputs, 1)
            # Akurasi dihitung berdasarkan kombinasi prediksi terhadap kedua label dengan proporsi lam dan (1 - lam)
            correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item())
        else:
            _, predicted = outputs.max(1)
            correct += predicted.eq(target).sum().item()
        total += target.size(0)
    return total_loss / len(loader), 100. * correct / total

def validate(model, loader, criterion, use_mixup=False):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            if use_mixup:
                data, targets_a, targets_b, lam = mixup_data(data, target, alpha=0.2)
                outputs = model(data).logits
                loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            else:
                outputs = model(data).logits
                loss = criterion(outputs, target)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(target).sum().item()
            total += target.size(0)
            all_targets.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
    f1 = f1_score(all_targets, all_predictions, average='weighted')
    return total_loss / len(loader), 100. * correct / total, f1, all_targets, all_predictions

# Setup TensorBoard writer
writer = SummaryWriter()

# Initialize metrics lists
train_losses = []
train_accs = []
val_losses = []
val_accs = []
val_f1_scores = []

# Early stopping parameters
patience = 20  # Meningkatkan patience untuk early stopping
counter = 0
best_val_acc = 0

# Training Loop
for epoch in range(EPOCHS):
    train_loss, train_acc = train_epoch(
        model, 
        train_loader, 
        optimizer, 
        criterion, 
        scheduler, 
        use_mixup=True, 
        use_cutmix=False,  # Atur ke True jika ingin menggunakan CutMix
        alpha=0.2, 
        clip_grad=True, 
        max_norm=1.0
    )
    val_loss, val_acc, val_f1, all_targets, all_predictions = validate(model, val_loader, criterion, use_mixup=False)
    
    # Menyimpan metrik
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    val_f1_scores.append(val_f1)

    # Logging ke TensorBoard
    writer.add_scalars('Loss', {'Train': train_loss, 'Val': val_loss}, epoch)
    writer.add_scalars('Accuracy', {'Train': train_acc, 'Val': val_acc}, epoch)
    writer.add_scalar('F1_Score/Val', val_f1, epoch)

    # Menyimpan model terbaik
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_cvt_model.pth")
        counter = 0  # Reset counter jika ada peningkatan
    else:
        counter += 1

    # Early stopping
    if counter >= patience:
        print(f"Early stopping setelah epoch {epoch+1} karena tidak ada peningkatan akurasi validasi.")
        break

    # Menampilkan progres
    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%, Val F1 Score: {val_f1:.4f}")

writer.close()

# Plot Confusion Matrix untuk Validasi
def plot_confusion_matrix(targets, predictions, class_names):
    cm = confusion_matrix(targets, predictions)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Prediksi")
    plt.ylabel("Sebenarnya")
    plt.title("Confusion Matrix")
    plt.show()

plot_confusion_matrix(all_targets, all_predictions, train_dataset.classes)

# Tampilkan prediksi per kelas
def show_predictions_per_class(model, dataset, num_classes=20, cols=5):
    model.eval()
    rows = (num_classes + cols - 1) // cols
    fig, axs = plt.subplots(rows, cols, figsize=(15, 3 * rows))
    axs = axs.flatten()

    class_images = {i: None for i in range(num_classes)}

    for i in range(len(dataset)):
        image, label = dataset[i]
        image = image.unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(image).logits
            _, predicted = output.max(1)
            if class_images[label] is None and predicted.item() == label:
                class_images[label] = (image.squeeze().cpu(), label, predicted.item())
                if all(value is not None for value in class_images.values()):
                    break

    for i in range(num_classes):
        ax = axs[i]
        if class_images[i] is not None:
            img, true_label, pred_label = class_images[i]
            # Denormalisasi gambar
            img = img * torch.tensor([0.229, 0.224, 0.225]).view(3,1,1) + torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
            img = img.permute(1, 2, 0).numpy()
            img = np.clip(img, 0, 1)
            ax.imshow(img)
            ax.set_title(f"True: {train_dataset.classes[true_label]}\nPred: {train_dataset.classes[pred_label]}")
            ax.axis('off')
        else:
            ax.text(0.5, 0.5, 'No Prediction', ha='center', va='center')
            ax.axis('off')

    for j in range(num_classes, len(axs)):
        axs[j].axis('off')
    
    plt.tight_layout()
    plt.show()

show_predictions_per_class(model, val_dataset)

# Plot Loss dan Akurasi
plt.figure(figsize=(12, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

# Plot Akurasi
plt.subplot(1, 2, 2)
plt.plot(range(1, len(train_accs) + 1), train_accs, label='Train Accuracy')
plt.plot(range(1, len(val_accs) + 1), val_accs, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Evaluasi Tambahan: Classification Report
def classification_metrics(loader):
    model.eval()
    all_targets = []
    all_predictions = []
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data).logits
            _, predicted = torch.max(output, 1)
            all_targets.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
    print(classification_report(all_targets, all_predictions, target_names=train_dataset.classes))

print("Classification Report untuk Training Set:")
classification_metrics(train_loader)

print("Classification Report untuk Validation Set:")
classification_metrics(val_loader)


Some weights of CvtForImageClassification were not initialized from the model checkpoint at microsoft/cvt-13 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([20]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 384]) in the checkpoint and torch.Size([20, 384]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Distribusi Kelas dalam Training Set:
Kelas 0: 114 sampel
Kelas 1: 108 sampel
Kelas 2: 108 sampel
Kelas 3: 108 sampel
Kelas 4: 108 sampel
Kelas 5: 102 sampel
Kelas 6: 108 sampel
Kelas 7: 108 sampel
Kelas 8: 108 sampel
Kelas 9: 108 sampel
Kelas 10: 108 sampel
Kelas 11: 102 sampel
Kelas 12: 108 sampel
Kelas 13: 108 sampel
Kelas 14: 108 sampel
Kelas 15: 108 sampel
Kelas 16: 108 sampel
Kelas 17: 108 sampel
Kelas 18: 108 sampel
Kelas 19: 108 sampel
Class Weights: tensor([0.9447, 0.9972, 0.9972, 0.9972, 0.9972, 1.0559, 0.9972, 0.9972, 0.9972,
        0.9972, 0.9972, 1.0559, 0.9972, 0.9972, 0.9972, 0.9972, 0.9972, 0.9972,
        0.9972, 0.9972], device='cuda:0')
Epoch 1/100
Train Loss: 2.9955, Train Acc: 6.38%
Val Loss: 2.9410, Val Acc: 13.54%, Val F1 Score: 0.0937
Epoch 2/100
Train Loss: 2.9438, Train Acc: 10.88%
Val Loss: 2.8638, Val Acc: 19.79%, Val F1 Score: 0.1600
Epoch 3/100
Train Loss: 2.8650, Train Acc: 16.80%
Val Loss: 2.6901, Val Acc: 29.17%, Val F1 Score: 0.2562
Epoch 4/100
Train L