In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns



In [None]:
# Reproducibility Config

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# CIFAR-10 Normalization

norm_mean = (0.4914, 0.4822, 0.4465)
norm_std = (0.2023, 0.1994, 0.2010)

train_tf = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

test_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])


In [None]:
# Dataset & Split

full_train = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=train_tf
)

test_data = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=test_tf
)

train_len = int(0.8 * len(full_train))
val_len = len(full_train) - train_len

train_data, val_data = random_split(full_train, [train_len, val_len])

train_loader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_data, batch_size=256, shuffle=False, num_workers=2)
test_loader = DataLoader(test_data, batch_size=256, shuffle=False, num_workers=2)

class_names = full_train.classes


In [None]:
# SE Block

class SEUnit(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)

    def forward(self, x):
        b, c, _, _ = x.size()
        pooled = F.adaptive_avg_pool2d(x, 1).reshape(b, c)
        score = torch.sigmoid(self.fc2(F.relu(self.fc1(pooled)))).reshape(b, c, 1, 1)
        return x * score


# Residual + SE Block

class ResSEBlock(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, 3, 1, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_ch)
        self.se = SEUnit(out_ch)

        self.shortcut = nn.Identity()
        if stride != 1 or in_ch != out_ch:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_ch, out_ch, 1, stride, bias=False),
                nn.BatchNorm2d(out_ch)
            )

    def forward(self, x):
        h = F.relu(self.bn1(self.conv1(x)))
        h = self.bn2(self.conv2(h))
        h = self.se(h)
        h += self.shortcut(x)
        return F.relu(h)

In [None]:
# CIFAR-10 SE-ResNet

class SE_ResNet_CIFAR(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.layer1 = nn.Sequential(
            ResSEBlock(64, 64),
            ResSEBlock(64, 64)
        )
        self.layer2 = nn.Sequential(
            ResSEBlock(64, 128, stride=2),
            ResSEBlock(128, 128)
        )
        self.layer3 = nn.Sequential(
            ResSEBlock(128, 256, stride=2),
            ResSEBlock(256, 256)
        )

        self.pool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(0.3)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.pool(x).flatten(1)
        x = self.dropout(x)
        return self.fc(x)


In [None]:
# Model, Loss, Optimizer, AMP

model = SE_ResNet_CIFAR().to(device)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
scaler = torch.cuda.amp.GradScaler()

In [None]:
# Train / Eval Loop

def process_epoch(loader, training=True):
    model.train() if training else model.eval()
    total, correct, loss_val = 0, 0, 0

    for inputs, targets in loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        if training:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

        loss_val += loss.item() * targets.size(0)
        correct += (outputs.argmax(1) == targets).sum().item()
        total += targets.size(0)

    return loss_val / total, correct / total


train_logs, val_logs = [], []

for ep in range(1, 181):
    tr_loss, tr_acc = process_epoch(train_loader, True)
    va_loss, va_acc = process_epoch(val_loader, False)
    scheduler.step()

    train_logs.append((tr_loss, tr_acc))
    val_logs.append((va_loss, va_acc))

    if ep % 10 == 0:
        print(f"Epoch {ep:03d} | Train Acc: {tr_acc:.4f} | Val Acc: {va_acc:.4f}")

print("\nTraining Finished.\n")

In [None]:
# Test Evaluation

model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for x, y in test_loader:
        x = x.to(device)
        preds = model(x).argmax(1).cpu().numpy()
        y_pred.extend(preds)
        y_true.extend(y.numpy())

test_acc = np.mean(np.array(y_true) == np.array(y_pred))
print("Test Accuracy:", test_acc)


In [None]:
# Accuracy / Loss Curves

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot([x[1] for x in train_logs], label='Train')
plt.plot([x[1] for x in val_logs], label='Val')
plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot([x[0] for x in train_logs], label='Train')
plt.plot([x[0] for x in val_logs], label='Val')
plt.title("Loss")
plt.legend()
plt.show()


In [None]:

# ------------------------------
# Confusion Matrix
# ------------------------------
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=class_names,
            yticklabels=class_names)
plt.title("Confusion Matrix")
plt.show()



In [None]:
# ------------------------------
# Per-Class Accuracy
# ------------------------------
class_correct = np.zeros(10)
class_total = np.zeros(10)

for i in range(len(y_true)):
    class_total[y_true[i]] += 1
    if y_true[i] == y_pred[i]:
        class_correct[y_true[i]] += 1

print("\nPer-Class Accuracy:")
for idx, cls in enumerate(class_names):
    print(f"{cls}: {class_correct[idx] / class_total[idx]:.4f}")