In [5]:
import platform, time, random

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
from torchvision.datasets import CIFAR10

SEED = 1337

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

start_time = time.time()

print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'} | Seed: {SEED}")

Device: cuda | Seed: 1337


In [3]:
model = 'resnet18' 
PRETRAINED = True
print(f"Using {model}, pretrained={PRETRAINED}")

from torchvision.models import resnet18

def count_params(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total, trainable

model = resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT)

total, trainable = count_params(model)
print(f"Total params: {total:,} | Trainable: {trainable:,}")

relu_paths = []
for name, module in model.named_modules():
    if isinstance(module, nn.ReLU):
        relu_paths.append(name)

print(f"Total ReLU before: {len(relu_paths)}")
print("First 9 ReLU paths:", relu_paths[:9])

Using resnet18, pretrained=True
Total params: 11,689,512 | Trainable: 11,689,512
Total ReLU before: 9
First 9 ReLU paths: ['relu', 'layer1.0.relu', 'layer1.1.relu', 'layer2.0.relu', 'layer2.1.relu', 'layer3.0.relu', 'layer3.1.relu', 'layer4.0.relu', 'layer4.1.relu']


In [None]:
replace_from = 6
new_act = nn.SiLU

def set_module(root, path, module_new):
    # print(path)
    parts = path.split('.')
    parent = root
    for p in parts[:-1]:
        parent = getattr(parent, p)
    last = parts[-1]
    try:
        setattr(parent, last, module_new)
    except:
        parent[int(last)] = module_new

replaced = []
for idx, path in enumerate(relu_paths):
    if idx >= replace_from:
        set_module(model, path, new_act())
        replaced.append(idx+1)

relu_count = sum(1 for m in model.modules() if isinstance(m, nn.ReLU))
silu_count = sum(1 for m in model.modules() if isinstance(m, new_act))
print(f"Remaining ReLU: {relu_count} | SiLU added: {silu_count}")
print("Replaced activations (1-based):", replaced)

layer3.1.relu
layer4.0.relu
layer4.1.relu
Remaining ReLU: 6 | SiLU added: 3
Replaced activations (1-based): [7, 8, 9]


In [None]:
N_CLASSES = 3
old_fc = model.fc
model.fc = nn.Linear(old_fc.in_features, N_CLASSES)
print(f"Old head: {old_fc.in_features}->{old_fc.out_features} | New head: {model.fc.in_features}->{model.fc.out_features}")

Old head: 512->1000 | New head: 512->3


In [None]:
need = ['cat','dog','airplane']
all_class = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
orig_to_new = {i: idx for idx, name in enumerate(need) for i, n in enumerate(all_class) if n==name}

print("Class mapping:", orig_to_new)

root = './data'
train_full = CIFAR10(root, train=True, download=True)
test_full = CIFAR10(root, train=False, download=True)

def filter_indices(ds, mapping):
    return [i for i in range(len(ds)) if ds[i][1] in mapping]

train_idx = filter_indices(train_full, orig_to_new)
test_idx = filter_indices(test_full, orig_to_new)
print(f"Train samples: {len(train_idx)} | Val samples: {len(test_idx)}")


Class mapping: {3: 0, 5: 1, 0: 2}
Train samples: 15000 | Val samples: 3000


In [None]:
img_size = 64
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(img_size, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_tf = transforms.Compose([
    transforms.Resize(img_size+8),
    transforms.CenterCrop(img_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

class FilteredCIFAR(Dataset):
    def __init__(self, base, idxs, mapping, tf=None):
        self.base = base
        self.idxs = list(idxs)
        self.map = mapping
        self.tf = tf
    def __len__(self):
        return len(self.idxs)
    def __getitem__(self, i):
        img, lbl = self.base[self.idxs[i]]
        if self.tf:
            img = self.tf(img)
        return img, self.map[lbl]

train_ds = FilteredCIFAR(train_full, train_idx, orig_to_new, train_tf)
val_ds = FilteredCIFAR(test_full, test_idx, orig_to_new, val_tf)

BATCH_SIZE = 64
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

x,y = next(iter(train_loader))
print(x.shape, y.shape)
xv,yv = next(iter(val_loader))
print(xv.shape, yv.shape)

torch.Size([64, 3, 64, 64]) torch.Size([64])
torch.Size([64, 3, 64, 64]) torch.Size([64])


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)
epochs = 5

def train_epoch(model, loader, crit, opt, dev):
    model.train()
    loss_sum = 0
    correct = 0
    total = 0
    for xb, yb in loader:
        xb, yb = xb.to(dev), yb.to(dev)
        opt.zero_grad()
        out = model(xb)
        loss = crit(out, yb)
        loss.backward()
        opt.step()
        loss_sum += loss.item()*xb.size(0)
        correct += (out.argmax(1)==yb).sum().item()
        total += xb.size(0)
    return loss_sum/total, 100*correct/total


In [None]:
def eval_model(model, loader, crit, dev):
    model.eval()
    loss_sum, correct, total = 0,0,0
    all_preds, all_labels = [],[]
    with torch.no_grad():
        for xb,yb in loader:
            xb,yb = xb.to(dev), yb.to(dev)
            out = model(xb)
            loss = crit(out,yb)
            loss_sum += loss.item()*xb.size(0)
            preds = out.argmax(1)
            correct += (preds==yb).sum().item()
            total += xb.size(0)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(yb.cpu().numpy())
    if all_preds:
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)
    return loss_sum/total, 100*correct/total, all_preds, all_labels

best_acc, best_epoch = 0,0
history = []
for ep in range(1, epochs+1):
    tr_loss, tr_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, _, _ = eval_model(model, val_loader, criterion, device)
    scheduler.step()
    history.append((tr_loss,tr_acc,val_loss,val_acc))
    print(f"Epoch {ep} | Train {tr_acc:.2f}% | Val {val_acc:.2f}%")
    if val_acc>best_acc:
        best_acc=val_acc
        best_epoch=ep

print("Best val acc:", best_acc, "at epoch", best_epoch)


Epoch 1 | Train 67.77% | Val 70.60%
Epoch 2 | Train 74.28% | Val 76.27%
Epoch 3 | Train 77.04% | Val 79.10%
Epoch 4 | Train 80.97% | Val 80.63%
Epoch 5 | Train 82.61% | Val 83.37%
Best val acc: 83.36666666666666 at epoch 5


In [None]:
_,_,preds,labels = eval_model(model,val_loader,criterion,device)
cm = np.zeros((N_CLASSES,N_CLASSES),dtype=int)
for t,p in zip(labels,preds):
    cm[t,p]+=1
print("Confusion matrix:\n",cm)

for c in range(N_CLASSES):
    tp=cm[c,c]
    fp=cm[:,c].sum()-tp
    fn=cm[c,:].sum()-tp
    precision = tp/(tp+fp) if tp+fp>0 else 0
    recall = tp/(tp+fn) if tp+fn>0 else 0
    f1 = 2*precision*recall/(precision+recall) if precision+recall>0 else 0
    print(f"Class {c}: P={precision:.3f} R={recall:.3f} F1={f1:.3f}")

Confusion matrix:
 [[782 161  57]
 [201 758  41]
 [ 27  12 961]]
Class 0: P=0.774 R=0.782 F1=0.778
Class 1: P=0.814 R=0.758 F1=0.785
Class 2: P=0.907 R=0.961 F1=0.933


In [13]:
end_time = time.time()
elapsed = end_time - start_time
m,s = divmod(int(elapsed),60)
print(f"Total time: {m}m {s}s | Seed {SEED}")
torch.save(model.state_dict(),'model_final.pth')

Total time: 4m 43s | Seed 1337
