In [3]:
# cluster_fgvc_emnist_mobile.py
# FGVC-style experiment on EMNIST confusion clusters using a pure PyTorch implementation of MobileNetV2

import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import EMNIST
from torchvision import transforms
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import pandas as pd

# 1. Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
seed = 42
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)

epochs = 20
batch_size = 16
lr = 3e-4

# 2. Data settings
split = 'balanced'
root = 'data'
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

data_clusters = [
    [0,24],
    [1,18,21],
    [5,28],
    [9,44],
    [15,40]
]

# 3. Pure PyTorch MobileNetV2 Implementation

def _make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

class ConvBNReLU(nn.Sequential):
    def __init__(self, in_ch, out_ch, kernel_size, stride):
        padding = (kernel_size - 1) // 2
        super().__init__(
            nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU6(inplace=True)
        )

class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio):
        super().__init__()
        self.stride = stride
        hidden_dim = int(inp * expand_ratio)
        self.use_res_connect = self.stride == 1 and inp == oup
        layers = []
        # expansion
        if expand_ratio != 1:
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, stride=1))
        # depthwise
        layers.extend([
            nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True),
            # projection
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            nn.BatchNorm2d(oup),
        ])
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)

class MobileNetV2Custom(nn.Module):
    def __init__(self, num_classes=2, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
        super().__init__()
        if inverted_residual_setting is None:
            # t, c, n, s
            inverted_residual_setting = [
                [1,  16, 1, 1],
                [6,  24, 2, 2],
                [6,  32, 3, 2],
                [6,  64, 4, 2],
                [6,  96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]
        input_channel = 32
        last_channel = 1280
        # first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.features = [ConvBNReLU(1, input_channel, kernel_size=3, stride=2)]
        # inverted residual blocks
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                self.features.append(InvertedResidual(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # last conv layer
        last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
        self.features.append(ConvBNReLU(input_channel, last_channel, kernel_size=1, stride=1))
        self.features = nn.Sequential(*self.features)
        # classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes),
        )
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        # global average pooling
        x = x.mean([2, 3])
        x = self.classifier(x)
        return x

# 4. Load EMNIST datasets
train_full = EMNIST(root=root, split=split, train=True, download=True, transform=transform)
test_full  = EMNIST(root=root, split=split, train=False, download=True, transform=transform)

# 5. Run experiments per cluster with custom MobileNetV2
results = []
os.makedirs('fgvc_results', exist_ok=True)
for cluster in data_clusters:
    print(f"\n=== MobileNetV2 Custom FGVC Cluster {cluster} ===")
    train_idx = [i for i,(_,l) in enumerate(train_full) if l in cluster]
    test_idx  = [i for i,(_,l) in enumerate(test_full)  if l in cluster]
    train_ds = Subset(train_full, train_idx)
    test_ds  = Subset(test_full,  test_idx)
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=2)

    label_map = {c:i for i,c in enumerate(cluster)}
    model = MobileNetV2Custom(num_classes=len(cluster)).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for ep in range(1, epochs+1):
        model.train()
        for x,y in train_loader:
            x = x.to(device)
            y_idx = torch.tensor([label_map[int(v)] for v in y], device=device)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y_idx)
            loss.backward(); optimizer.step()
        model.eval()
        all_pred, all_true = [], []
        with torch.no_grad():
            for x,y in test_loader:
                x = x.to(device)
                y_idx = [label_map[int(v)] for v in y]
                preds = model(x).argmax(dim=1).cpu().tolist()
                all_pred += preds; all_true += y_idx
        acc = accuracy_score(all_true, all_pred)
        print(f"Epoch {ep:02d} | Acc {acc:.4f}")

    results.append({'cluster': cluster, 'accuracy': acc})
    cm = confusion_matrix(all_true, all_pred)
    disp = ConfusionMatrixDisplay(cm, display_labels=cluster)
    fig, ax = plt.subplots(figsize=(4,4))
    disp.plot(ax=ax, cmap='Blues', colorbar=False)
    ax.set_title(f"Cluster {cluster} Acc {acc:.3f}")
    fig.savefig(f"fgvc_results/cluster_{''.join(map(str,cluster))}_cm.png")
    plt.close(fig)

# 6. Summary
import pandas as pd

df = pd.DataFrame(results)
print(df)



=== MobileNetV2 Custom FGVC Cluster [0, 24] ===
Epoch 01 | Acc 0.6188
Epoch 02 | Acc 0.6837
Epoch 03 | Acc 0.6887
Epoch 04 | Acc 0.6875
Epoch 05 | Acc 0.6850
Epoch 06 | Acc 0.7000
Epoch 07 | Acc 0.6963
Epoch 08 | Acc 0.6388
Epoch 09 | Acc 0.6913
Epoch 10 | Acc 0.6850
Epoch 11 | Acc 0.6687
Epoch 12 | Acc 0.6775
Epoch 13 | Acc 0.6937
Epoch 14 | Acc 0.6637
Epoch 15 | Acc 0.6550
Epoch 16 | Acc 0.6800
Epoch 17 | Acc 0.6650
Epoch 18 | Acc 0.6850
Epoch 19 | Acc 0.6512
Epoch 20 | Acc 0.6737

=== MobileNetV2 Custom FGVC Cluster [1, 18, 21] ===
Epoch 01 | Acc 0.4950
Epoch 02 | Acc 0.5317
Epoch 03 | Acc 0.5542
Epoch 04 | Acc 0.5650
Epoch 05 | Acc 0.5808
Epoch 06 | Acc 0.5900
Epoch 07 | Acc 0.5708
Epoch 08 | Acc 0.5758
Epoch 09 | Acc 0.5633
Epoch 10 | Acc 0.5867
Epoch 11 | Acc 0.5925
Epoch 12 | Acc 0.6017
Epoch 13 | Acc 0.6133
Epoch 14 | Acc 0.6150
Epoch 15 | Acc 0.6008
Epoch 16 | Acc 0.6292
Epoch 17 | Acc 0.5817
Epoch 18 | Acc 0.5958
Epoch 19 | Acc 0.6100
Epoch 20 | Acc 0.6000

=== MobileNetV2 C