In [1]:
!pip install --upgrade timm torch torchvision matplotlib scikit-learn



In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import timm
from timm.data.mixup import Mixup
from timm.loss import SoftTargetCrossEntropy
import numpy as np
import random
import time
import os
import matplotlib.pyplot as plt
from torch.amp import autocast, GradScaler
from sklearn.metrics import confusion_matrix
import seaborn as sns
import gc

# --- TEMƒ∞ZLƒ∞K ---
gc.collect()
torch.cuda.empty_cache()

# --- KONFƒ∞G√úRASYON ---
class Config:
    seed = 42
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # ƒ∞LK STRATEJƒ∞: ViT-Base (ImageNet-21k Pretrained)
    # Bu model CIFAR-10 i√ßin "tank" gibidir. Saƒülam ve g√º√ßl√º.
    model_name = 'vit_base_patch16_224.augreg_in21k_ft_in1k'
    img_size = 224
    num_classes = 10

    # T4 GPU i√ßin G√ºvenli Ayarlar
    batch_size = 64        # SAM olmadƒ±ƒüƒ± i√ßin batch'i y√ºkselttik (Hƒ±zlƒ± eƒüitim)
    epochs = 7             # 7 Epoch'ta %98'i g√∂r√ºr√ºz

    lr = 1e-4              # AdamW i√ßin ideal hƒ±z
    weight_decay = 0.05

    # Mixup & Cutmix (Ezberlemeyi √∂nler, skoru artƒ±rƒ±r)
    mixup_alpha = 0.8
    cutmix_alpha = 1.0
    mixup_prob = 1.0

cfg = Config()

# --- REPRODUCIBILITY ---
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(cfg.seed)
print(f"Sistem Hazƒ±r: {cfg.device} | Model: {cfg.model_name}")

# --- DATA PIPELINE ---
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

transform_train = transforms.Compose([
    transforms.Resize((cfg.img_size, cfg.img_size), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(num_ops=2, magnitude=9), # Otomatik Veri Zenginle≈ütirme
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

transform_test = transforms.Compose([
    transforms.Resize((cfg.img_size, cfg.img_size), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=cfg.batch_size, shuffle=True, num_workers=2, pin_memory=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=cfg.batch_size, shuffle=False, num_workers=2, pin_memory=True)

# --- MODEL SETUP ---
print("Model Y√ºkleniyor...")
model = timm.create_model(cfg.model_name, pretrained=True, num_classes=cfg.num_classes)
model = model.to(cfg.device)

# --- OPTIMIZER (ADAMW - G√ºvenli Liman) ---
# SAM yerine standart AdamW kullanƒ±yoruz. Daha hƒ±zlƒ± ve hatasƒ±z.
optimizer = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg.epochs)

# --- MIXUP SETUP ---
mixup_fn = Mixup(
    mixup_alpha=cfg.mixup_alpha,
    cutmix_alpha=cfg.cutmix_alpha,
    prob=cfg.mixup_prob,
    switch_prob=0.5,
    mode='batch',
    label_smoothing=0.1,
    num_classes=cfg.num_classes
)

# Mixup olduƒüu i√ßin SoftTarget kullanƒ±yoruz
criterion_train = SoftTargetCrossEntropy()
criterion_test = nn.CrossEntropyLoss()
scaler = GradScaler('cuda')

# --- TRAINING LOOP (Standart & G√ºvenli) ---
def train_one_epoch(epoch):
    model.train()
    running_loss = 0.0

    start_time = time.time()

    for i, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(cfg.device), targets.to(cfg.device)

        # Mixup Uygula
        inputs, targets = mixup_fn(inputs, targets)

        optimizer.zero_grad()

        with autocast('cuda'):
            outputs = model(inputs)
            loss = criterion_train(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        if i % 100 == 0 and i > 0:
            print(f"  Step [{i}/{len(trainloader)}] | Loss: {loss.item():.4f}")

    return running_loss / len(trainloader), time.time() - start_time

# --- EVALUATION (TTA) ---
@torch.no_grad()
def evaluate_tta():
    model.eval()
    correct = 0
    total = 0

    print("Test ediliyor (TTA)...")
    for inputs, targets in testloader:
        inputs, targets = inputs.to(cfg.device), targets.to(cfg.device)
        with autocast('cuda'):
            # 1. Normal Tahmin
            out1 = model(inputs)
            # 2. Ters √áevrilmi≈ü (Flip) Tahmin
            out2 = model(torch.flip(inputs, dims=[3]))
            # Ortalamasƒ±
            probs = (torch.softmax(out1, dim=1) + torch.softmax(out2, dim=1)) / 2

        _, predicted = probs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    return 100. * correct / total

# --- MAIN LOOP ---
best_acc = 0.0

for epoch in range(cfg.epochs):
    train_loss, duration = train_one_epoch(epoch)
    test_acc = evaluate_tta()
    scheduler.step()

    print(f"Epoch [{epoch+1}/{cfg.epochs}] | S√ºre: {duration:.0f}s")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Test Acc (TTA): {test_acc:.2f}%")

    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), 'sota_cifar10_best.pth')
        print("üèÜ Yeni En ƒ∞yi Model Kaydedildi!")

print(f"\nEƒüitim Tamamlandƒ±. Final SOTA Skoru: {best_acc:.2f}%")

Sistem Hazƒ±r: cuda | Model: vit_base_patch16_224.augreg_in21k_ft_in1k


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 170M/170M [00:03<00:00, 43.2MB/s]


Model Y√ºkleniyor...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

  Step [100/782] | Loss: 1.0393
  Step [200/782] | Loss: 1.5174
  Step [300/782] | Loss: 1.2415
  Step [400/782] | Loss: 0.7669
  Step [500/782] | Loss: 1.6730
  Step [600/782] | Loss: 1.6284
  Step [700/782] | Loss: 1.4346
Test ediliyor (TTA)...
Epoch [1/7] | S√ºre: 395s
Train Loss: 1.3399
Test Acc (TTA): 96.57%
üèÜ Yeni En ƒ∞yi Model Kaydedildi!
  Step [100/782] | Loss: 1.4865
  Step [200/782] | Loss: 1.1772
  Step [300/782] | Loss: 0.5851
  Step [400/782] | Loss: 1.0668
  Step [500/782] | Loss: 1.4792
  Step [600/782] | Loss: 0.9036
  Step [700/782] | Loss: 0.6178
Test ediliyor (TTA)...
Epoch [2/7] | S√ºre: 419s
Train Loss: 1.2075
Test Acc (TTA): 96.82%
üèÜ Yeni En ƒ∞yi Model Kaydedildi!
  Step [100/782] | Loss: 1.1460
  Step [200/782] | Loss: 0.6677
  Step [300/782] | Loss: 1.2151
  Step [400/782] | Loss: 1.2593
  Step [500/782] | Loss: 1.0499
  Step [600/782] | Loss: 1.1506
  Step [700/782] | Loss: 0.9847
Test ediliyor (TTA)...
Epoch [3/7] | S√ºre: 408s
Train Loss: 1.1659
Test A