In [1]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
from torchvision import transforms, datasets
import timm

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score
from torch.optim.lr_scheduler import ReduceLROnPlateau




In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_DIR = "/kaggle/input/malimg-original/malimg_paper_dataset_imgs"
BATCH_SIZE = 16  
IMG_SIZE = 256   # Sesuai model SwinV2-256
NUM_CLASSES = 25
EPOCHS = 20
LEARNING_RATE = 1e-4
OUTPUT_DIR = "/kaggle/working/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [3]:
temp_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])
temp_dataset = datasets.ImageFolder(root=DATA_DIR, transform=temp_transform)
loader = DataLoader(temp_dataset, batch_size=128, shuffle=False, num_workers=2)

mean = 0.
std = 0.
total_samples = 0
for data, _ in loader:
    batch_samples = data.size(0)
    data = data.view(batch_samples, data.size(1), -1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
    total_samples += batch_samples
mean /= total_samples
std /= total_samples

train_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((int(IMG_SIZE * 1.1), int(IMG_SIZE * 1.1))),
    transforms.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.95, 1.0), ratio=(0.95, 1.05)),
    transforms.ToTensor(),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 1.0))], p=0.3),
    transforms.RandomApply([
        transforms.Lambda(lambda x: torch.clamp(x + torch.randn_like(x) * 0.01, 0, 1))
    ], p=0.3),
    transforms.Normalize(mean=mean, std=std)
])

test_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


In [4]:
raw_dataset = datasets.ImageFolder(root=DATA_DIR)
all_targets = [label for _, label in raw_dataset.samples]
all_indices = list(range(len(raw_dataset)))

split_path = os.path.join(OUTPUT_DIR, "train_test_split.npz")
if os.path.exists(split_path):
    splits = np.load(split_path)
    train_idx, test_idx = splits['train_idx'], splits['test_idx']
else:
    train_idx, test_idx = train_test_split(
        all_indices,
        test_size=0.2,
        stratify=all_targets,
        random_state=42
    )
    np.savez(split_path, train_idx=train_idx, test_idx=test_idx)

train_dataset_raw = datasets.ImageFolder(root=DATA_DIR, transform=train_transform)
test_dataset_raw = datasets.ImageFolder(root=DATA_DIR, transform=test_transform)

train_dataset = Subset(train_dataset_raw, train_idx)
test_dataset = Subset(test_dataset_raw, test_idx)

train_targets = [all_targets[i] for i in train_idx]
class_counts = Counter(train_targets)
weights = [1.0 / class_counts[train_targets[i]] for i in range(len(train_targets))]
sampler = WeightedRandomSampler(weights, len(weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

class_names = raw_dataset.classes


In [5]:
model = timm.create_model('swinv2_base_window16_256', pretrained=True, num_classes=NUM_CLASSES)

model.patch_embed.proj = nn.Conv2d(
    in_channels=1,
    out_channels=model.patch_embed.proj.out_channels,
    kernel_size=model.patch_embed.proj.kernel_size,
    stride=model.patch_embed.proj.stride,
    bias=model.patch_embed.proj.bias is not None
)
nn.init.xavier_uniform_(model.patch_embed.proj.weight)
if model.patch_embed.proj.bias is not None:
    nn.init.zeros_(model.patch_embed.proj.bias)

#dropout dihead
model.head = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(model.head.in_features, NUM_CLASSES)
)

model.to(DEVICE)

model.safetensors:   0%|          | 0.00/357M [00:00<?, ?B/s]

SwinTransformerV2(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(1, 128, kernel_size=(4, 4), stride=(4, 4))
    (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  )
  (layers): Sequential(
    (0): SwinTransformerV2Stage(
      (downsample): Identity()
      (blocks): ModuleList(
        (0): SwinTransformerV2Block(
          (attn): WindowAttention(
            (cpb_mlp): Sequential(
              (0): Linear(in_features=2, out_features=512, bias=True)
              (1): ReLU(inplace=True)
              (2): Linear(in_features=512, out_features=4, bias=False)
            )
            (qkv): Linear(in_features=128, out_features=384, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=128, out_features=128, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
            (softmax): Softmax(dim=-1)
          )
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (drop_path1): 

In [6]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, reduction='mean'):
        super().__init__()
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        # Pastikan inputs adalah 2D [batch_size, num_classes]
        if inputs.dim() > 2:
            inputs = inputs.view(inputs.size(0), -1)
        
        # Pastikan targets adalah 1D tensor dengan tipe long
        targets = targets.view(-1).long()
        
        # Validasi: pastikan targets dalam range yang valid [0, num_classes-1]
        num_classes = inputs.size(1)
        assert targets.min() >= 0, f"Target min value {targets.min()} is negative"
        assert targets.max() < num_classes, f"Target max value {targets.max()} >= num_classes {num_classes}"
        
        # Hitung cross entropy dengan log softmax
        log_probs = torch.nn.functional.log_softmax(inputs, dim=-1)
        
        # Gunakan cross_entropy internal untuk lebih aman
        ce_loss = torch.nn.functional.nll_loss(log_probs, targets, reduction='none')
        
        # Hitung probability untuk focal weight
        pt = torch.exp(-ce_loss)
        
        # Hitung focal loss
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

criterion = FocalLoss(gamma=2)
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=False)



In [7]:
print("=" * 60)
print("VALIDASI DATA")
print("=" * 60)
print(f"Number of classes in dataset: {len(class_names)}")
print(f"Class names: {class_names}")
print(f"NUM_CLASSES parameter: {NUM_CLASSES}")
print(f"Train samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# Cek range label di training set
print("\nChecking training labels...")
sample_labels = []
for i, (_, label) in enumerate(train_dataset):
    sample_labels.append(label)
    if i >= 100:  # Check first 100 samples
        break

sample_labels = np.array(sample_labels)
print(f"Sample labels min: {sample_labels.min()}, max: {sample_labels.max()}")
print(f"Unique labels in sample: {np.unique(sample_labels)}")

# Verifikasi apakah NUM_CLASSES sesuai dengan jumlah kelas aktual
actual_num_classes = len(class_names)
if NUM_CLASSES != actual_num_classes:
    print(f"\n  WARNING: NUM_CLASSES ({NUM_CLASSES}) != actual classes ({actual_num_classes})")
    print(f"Updating NUM_CLASSES to {actual_num_classes}")
    NUM_CLASSES = actual_num_classes
    
    # Rebuild model head
    model.head = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(model.head[1].in_features, NUM_CLASSES)
    )
    model.to(DEVICE)
    
    # Rebuild optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)

print("=" * 60)
print("STARTING TRAINING")
print("=" * 60)

# Test forward pass untuk debug
print("\nTesting forward pass...")
model.eval()
with torch.no_grad():
    test_batch = next(iter(train_loader))
    test_input, test_label = test_batch[0].to(DEVICE), test_batch[1].to(DEVICE)
    test_output = model(test_input)
    print(f"Input shape: {test_input.shape}")
    print(f"Output shape: {test_output.shape}")
    print(f"Label shape: {test_label.shape}")
    print(f"Expected output shape: [{test_input.size(0)}, {NUM_CLASSES}]")
    
    if test_output.dim() > 2:
        print(f"  Output has {test_output.dim()} dimensions, reshaping...")
        test_output = test_output.view(test_output.size(0), -1)
        print(f"Reshaped output: {test_output.shape}")

print("\nForward pass test successful! Starting training...\n")


VALIDASI DATA
Number of classes in dataset: 25
Class names: ['Adialer.C', 'Agent.FYI', 'Allaple.A', 'Allaple.L', 'Alueron.gen!J', 'Autorun.K', 'C2LOP.P', 'C2LOP.gen!g', 'Dialplatform.B', 'Dontovo.A', 'Fakerean', 'Instantaccess', 'Lolyda.AA1', 'Lolyda.AA2', 'Lolyda.AA3', 'Lolyda.AT', 'Malex.gen!J', 'Obfuscator.AD', 'Rbot!gen', 'Skintrim.N', 'Swizzor.gen!E', 'Swizzor.gen!I', 'VB.AT', 'Wintrim.BX', 'Yuner.A']
NUM_CLASSES parameter: 25
Train samples: 7471
Test samples: 1868

Checking training labels...
Sample labels min: 0, max: 24
Unique labels in sample: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 17 20 21 22 23 24]
STARTING TRAINING

Testing forward pass...
Input shape: torch.Size([16, 1, 256, 256])
Output shape: torch.Size([16, 8, 8, 25])
Label shape: torch.Size([16])
Expected output shape: [16, 25]
  Output has 4 dimensions, reshaping...
Reshaped output: torch.Size([16, 1600])

Forward pass test successful! Starting training...



In [8]:
train_losses, train_accuracies = [], []
val_losses, val_accuracies = [], []

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        
        # DEBUG: Print shape di first batch
        if epoch == 0 and total_train == 0:
            print(f"\n[DEBUG] First batch - Output shape: {outputs.shape}")
        
        # CRITICAL FIX: Reshape jika output bukan 2D
        if outputs.dim() > 2:
            outputs = outputs.view(outputs.size(0), -1)
            if epoch == 0 and total_train == 0:
                print(f"[DEBUG] Reshaped to: {outputs.shape}\n")
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()
    
    train_acc = 100 * correct_train / total_train
    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    # Validation
    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            
            # CRITICAL FIX: Reshape jika output bukan 2D
            if outputs.dim() > 2:
                outputs = outputs.view(outputs.size(0), -1)
            
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
    
    val_acc = 100 * correct_val / total_val
    val_loss_avg = val_loss / len(test_loader)
    val_losses.append(val_loss_avg)
    val_accuracies.append(val_acc)
    scheduler.step(val_loss_avg)
    
    print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss_avg:.4f}, Val Acc: {val_acc:.2f}%")



[DEBUG] First batch - Output shape: torch.Size([16, 8, 8, 25])
[DEBUG] Reshaped to: torch.Size([16, 1600])

Epoch 1/20 - Train Loss: 0.7482, Train Acc: 75.28%, Val Loss: 0.0394, Val Acc: 90.42%
Epoch 2/20 - Train Loss: 0.0676, Train Acc: 92.56%, Val Loss: 0.0277, Val Acc: 90.58%
Epoch 3/20 - Train Loss: 0.0457, Train Acc: 94.08%, Val Loss: 0.8168, Val Acc: 81.48%
Epoch 4/20 - Train Loss: 0.0465, Train Acc: 94.23%, Val Loss: 0.0181, Val Acc: 99.36%
Epoch 5/20 - Train Loss: 0.0454, Train Acc: 93.98%, Val Loss: 0.0280, Val Acc: 91.06%
Epoch 6/20 - Train Loss: 0.0318, Train Acc: 95.33%, Val Loss: 0.0244, Val Acc: 98.98%
Epoch 7/20 - Train Loss: 0.0279, Train Acc: 96.57%, Val Loss: 0.0180, Val Acc: 99.14%
Epoch 8/20 - Train Loss: 0.0303, Train Acc: 96.05%, Val Loss: 0.0104, Val Acc: 99.25%
Epoch 9/20 - Train Loss: 0.0307, Train Acc: 96.56%, Val Loss: 0.0200, Val Acc: 99.52%
Epoch 10/20 - Train Loss: 0.0186, Train Acc: 97.60%, Val Loss: 0.0080, Val Acc: 99.52%
Epoch 11/20 - Train Loss: 0.02

In [9]:
torch.save(model.state_dict(), os.path.join(OUTPUT_DIR, "swinv2_malimg_256.pth"))

# Evaluasi
model.eval()
all_preds, all_labels, inference_times = [], [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        start = time.time()
        outputs = model(inputs)
        
        # Pastikan outputs adalah 2D [batch_size, num_classes]
        if outputs.dim() > 2:
            outputs = outputs.view(outputs.size(0), -1)
        
        end = time.time()
        inference_times.append(end - start)
        
        # Prediksi
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

# Metrik
accuracy = accuracy_score(all_labels, all_preds)
precision, recall, f1, support = precision_recall_fscore_support(
    all_labels, all_preds, average=None, labels=range(NUM_CLASSES), zero_division=0
)
precision_avg = np.mean(precision)
recall_avg = np.mean(recall)
f1_avg = np.mean(f1)

report_df = pd.DataFrame({
    'class': class_names,
    'precision': precision,
    'recall': recall,
    'f1-score': f1,
    'support': support
})
report_df.to_csv(os.path.join(OUTPUT_DIR, "SwinV2_256_per_class_metrics.csv"), index=False)

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
pd.DataFrame(cm, index=class_names, columns=class_names).to_csv(os.path.join(OUTPUT_DIR, "SwinV2_256_confusion_matrix.csv"))
plt.figure(figsize=(14, 12))
sns.heatmap(cm, annot=False, xticklabels=class_names, yticklabels=class_names, cmap='Blues')
plt.title("Confusion Matrix - SwinV2 (256x256)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "SwinV2_256_confusion_matrix.png"), dpi=150)
plt.close()

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend()
plt.title("Loss Curves")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Acc')
plt.plot(val_accuracies, label='Val Acc')
plt.legend()
plt.title("Accuracy Curves")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "SwinV2_256_training_curves.png"))
plt.close()

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
model_size_mb = total_params * 4 / (1024 ** 2)
avg_time_per_image = np.mean(inference_times) / BATCH_SIZE
total_inference_time = sum(inference_times)
throughput = len(test_dataset) / total_inference_time

summary = {
    "Model": "SwinV2 Base (256x256, 1-Channel, Focal Loss)",
    "Accuracy": accuracy,
    "Macro Precision": precision_avg,
    "Macro Recall": recall_avg,
    "Macro F1": f1_avg,
    "Total Params": total_params,
    "Trainable Params": trainable_params,
    "Model Size (MB)": model_size_mb,
    "Avg Inference Time (ms)": avg_time_per_image * 1000,
    "Throughput (img/sec)": throughput,
    "Hardware": str(DEVICE) + (f" ({torch.cuda.get_device_name(0)})" if torch.cuda.is_available() else "")
}
pd.DataFrame([summary]).to_csv(os.path.join(OUTPUT_DIR, "SwinV2_256_summary.csv"), index=False)

print("\n SwinV2 (256x256): semua hasil disimpan di /kaggle/working/")
print(f"Final Accuracy: {accuracy:.4f}")
print(f"Macro F1-Score: {f1_avg:.4f}")


 SwinV2 (256x256): semua hasil disimpan di /kaggle/working/
Final Accuracy: 0.9946
Macro F1-Score: 0.9867
