# ResNet with CrossEntropy

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random
import datetime
import os
import copy
import random
import shutil
import time
from tqdm import tqdm
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import torch.backends.cudnn as cudnn
from functools import wraps
from dataclasses import dataclass
import math

In [2]:
!nvidia-smi

Thu Aug 21 02:29:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   51C    P8             12W /   72W |       0MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Dataset Download

In [3]:
BASE_PATH = "/content/cub-200-2011/"

In [4]:
!wget https://s3.amazonaws.com/fast-ai-imageclas/CUB_200_2011.tgz

--2025-08-21 02:30:12--  https://s3.amazonaws.com/fast-ai-imageclas/CUB_200_2011.tgz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.46.86, 16.15.219.121, 52.217.75.62, ...
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.46.86|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1150585339 (1.1G) [application/x-tar]
Saving to: ‘CUB_200_2011.tgz’


2025-08-21 02:31:20 (16.5 MB/s) - ‘CUB_200_2011.tgz’ saved [1150585339/1150585339]



In [5]:
!tar -xf /content/CUB_200_2011.tgz -C /content/

## Difine seed

In [6]:
def set_seed(seed=200):
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # optional
    # for numpy.random
    np.random.seed(seed)
    # for built-in random
    random.seed(seed)
    # for hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)

## Define ResNet


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as models


class Feature(nn.Module):
    def __init__(self, model='resnet18'):
        nn.Module.__init__(self)
        self.model = model

        self.base = models.__dict__[model](weights='IMAGENET1K_V1')

    def forward(self, x):
        x = self.base.conv1(x)
        x = self.base.bn1(x)
        x = self.base.relu(x)
        x = self.base.maxpool(x)

        x = self.base.layer1(x)
        x = self.base.layer2(x)
        x = self.base.layer3(x)
        x = self.base.layer4(x)

        x = self.base.avgpool(x)
        x = torch.flatten(x, 1)

        return x

def resnet18_pretrain(**kwargs):
    return Feature('resnet18')

def resnet34_pretrain(**kwargs):
    return Feature('resnet34')

def resnet50_pretrain(**kwargs):
    return Feature('resnet50')

model_dict = {
    'resnet18_pretrain': [resnet18_pretrain, 512],
    'resnet34_pretrain': [resnet34_pretrain, 512],
    'resnet50_pretrain': [resnet50_pretrain, 2048],
}


class SupCEResNet(nn.Module):
    """encoder + classifier"""
    def __init__(self, name='resnet50', num_classes=10):
        super(SupCEResNet, self).__init__()
        model_fun, dim_in = model_dict[name]
        self.encoder = model_fun()
        self.fc = nn.Linear(dim_in, num_classes)

    def forward(self, x):
        return self.fc(self.encoder(x))


## Define Utils

In [None]:
@dataclass
class Option():
    dataset: str = 'cub-200-2011'
    model: str = 'resnet18_pretrain'
    batch_size: int = 128
    num_workers: int = 2
    epochs: int = 300
    learning_rate: float = 0.2
    lr_decay_epochs: str = '200,240,270'
    lr_decay_rate: float = 0.1
    weight_decay: float = 1e-4
    momentum: float = 0.9
    cosine: bool = True
    warm: bool = True
    temp: float = 0.1
    trial: int = 5
    seed: int = 44
    n_cls: int = 200

opt = Option()

set_seed(opt.seed)
opt.model_path = f'seed{opt.seed}_{opt.model}_CrossEntropy_epoch_{opt.epochs}_batch_size_{opt.batch_size}_trial_{opt.trial}'
opt.tb_dir = f'{BASE_PATH}/tensorboards/{opt.dataset}'
if not os.path.isdir(opt.tb_dir):
    os.makedirs(opt.tb_dir)

opt.checkpoint_dir = f'{BASE_PATH}/checkpoints/{opt.dataset}'
if not os.path.isdir(opt.checkpoint_dir):
    os.makedirs(opt.checkpoint_dir)

iterations = opt.lr_decay_epochs.split(',')
opt.lr_decay_epochs = list([])
for it in iterations:
    opt.lr_decay_epochs.append(int(it))

if opt.cosine:
    opt.model_path = '{}_cosine'.format(opt.model_path)
# warm-up for large-batch training,
if opt.batch_size > 256:
    opt.warm = True
if opt.warm:
    opt.model_path = '{}_warm'.format(opt.model_path)
    opt.warmup_from = 0.01
    opt.warm_epochs = 10
    if opt.cosine:
        eta_min = opt.learning_rate * (opt.lr_decay_rate ** 3)
        opt.warmup_to = eta_min + (opt.learning_rate - eta_min) * (
                1 + math.cos(math.pi * opt.warm_epochs / opt.epochs)) / 2
    else:
        opt.warmup_to = opt.learning_rate

In [9]:
def adjust_learning_rate(args, optimizer, epoch):
    lr = args.learning_rate
    if args.cosine:
        eta_min = lr * (args.lr_decay_rate ** 3)
        lr = eta_min + (lr - eta_min) * (
                1 + math.cos(math.pi * epoch / args.epochs)) / 2
    else:
        steps = np.sum(epoch > np.asarray(args.lr_decay_epochs))
        if steps > 0:
            lr = lr * (args.lr_decay_rate ** steps)

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def warmup_learning_rate(args, epoch, batch_id, total_batches, optimizer):
    if args.warm and epoch <= args.warm_epochs:
        p = (batch_id + (epoch - 1) * total_batches) / \
            (args.warm_epochs * total_batches)
        lr = args.warmup_from + p * (args.warmup_to - args.warmup_from)

        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

In [10]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [11]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

## DataLoader Define

In [12]:
from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class CUBDataset(Dataset):
    def __init__(self, root, split="train", transform=None, use_bbox=True):
        self.root      = Path(root)
        self.transform = transform
        self.split     = split
        self.use_bbox  = use_bbox

        # --- ① ファイルをパース ------------------------------------------------
        self.img_paths = {}   # id -> relative path
        self.labels    = {}   # id -> 0-based label
        self.is_train  = {}   # id -> bool

        with open(self.root / "images.txt") as f:
            for line in f:
                img_id, rel = line.strip().split()
                self.img_paths[int(img_id)] = rel

        with open(self.root / "image_class_labels.txt") as f:
            for line in f:
                img_id, lbl = line.strip().split()
                self.labels[int(img_id)] = int(lbl) - 1  # 0-start

        with open(self.root / "train_test_split.txt") as f:
            for line in f:
                img_id, flag = line.strip().split()
                self.is_train[int(img_id)] = (flag == "1")

        # --- ② バウンディングボックス ------------------------------------------
        # bounding_boxes.txt: <id> <x> <y> <w> <h>
        self.bboxes = {}
        if self.use_bbox:
            with open(self.root / "bounding_boxes.txt") as f:
                for line in f:
                    img_id, x, y, w, h = map(float, line.strip().split())
                    self.bboxes[int(img_id)] = (x, y, x + w, y + h)

        # --- ③ split でフィルタ -------------------------------------------------
        self.ids = [
            i for i in self.img_paths
            if (self.is_train[i] if split == "train" else not self.is_train[i])
        ]

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id   = self.ids[idx]
        img_path = self.root / "images" / self.img_paths[img_id]
        label    = self.labels[img_id]

        img = Image.open(img_path).convert("RGB")

        # --- ④ BBox で Crop -----------------------------------------------------
        if self.use_bbox:
            x1, y1, x2, y2 = self.bboxes[img_id]
            img = img.crop((x1, y1, x2, y2))

        # --- ⑤ transform 適用 ----------------------------------------------------
        if self.transform:
            img = self.transform(img)

        return img, label

In [13]:
from torch.utils.data import DataLoader

transform_train = transforms.Compose([
    transforms.Resize((600, 600), Image.BILINEAR),
    transforms.RandomCrop((448, 448)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

transform = transforms.Compose([
    transforms.Resize((600, 600), Image.BILINEAR),
    transforms.CenterCrop((448, 448)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

train_ds = CUBDataset("/content/CUB_200_2011", split="train", transform=transform_train, use_bbox=False)
test_ds  = CUBDataset("/content/CUB_200_2011", split="test" , transform=transform, use_bbox=False)

train_loader = DataLoader(train_ds,
                          batch_size=opt.batch_size,
                          shuffle=True,
                          num_workers=opt.num_workers)

test_loader = DataLoader(
    test_ds,
    batch_size=opt.batch_size,
    shuffle=False,
    num_workers=1
    )


In [14]:
from torch.utils.tensorboard import SummaryWriter

current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tb_log_dir = f'{opt.tb_dir}/{opt.model_path}/{current_time}'

writer = SummaryWriter(log_dir=tb_log_dir, flush_secs=2)

# training

In [15]:
model = SupCEResNet(name=opt.model, num_classes=opt.n_cls)
model.cuda()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 229MB/s]


SupCEResNet(
  (encoder): Feature(
    (base): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNor

In [16]:
def train(model, trainloader, optimizer, criterion, opt, epoch, writer):
    model.train()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    for idx, (images, labels) in enumerate(trainloader):
      #
      if torch.cuda.is_available():
          images = images.cuda(non_blocking=True)
          labels = labels.cuda(non_blocking=True)

      bsz = labels.shape[0]
      warmup_learning_rate(opt, epoch, idx, len(trainloader), optimizer)
      output = model(images)
      loss = criterion(output, labels)

      # SGD
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # measure accuracy and record loss
      acc1, acc5 = accuracy(output, labels, topk=(1, 5))
      losses.update(loss.item(), bsz)
      top1.update(acc1[0], bsz)
      top5.update(acc5[0], bsz)

    writer.add_scalar('train/loss', losses.avg, epoch)
    writer.add_scalar('train/accuracy_top1', top1.avg, epoch)
    writer.add_scalar('train/accuracy_top5', top5.avg, epoch)


In [17]:
def validation(model, valloader, criterion, writer, epoch):
    model.eval()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    with torch.no_grad():
        for images, labels in valloader:
            if torch.cuda.is_available():
              images = images.cuda(non_blocking=True)
              labels = labels.cuda(non_blocking=True)
            bsz = labels.shape[0]

            output = model(images)
            loss = criterion(output, labels)
            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, labels, topk=(1, 5))
            losses.update(loss.item(), bsz)
            top1.update(acc1[0], bsz)
            top5.update(acc5[0], bsz)

    writer.add_scalar('test/loss', losses.avg, epoch)
    writer.add_scalar('test/accuracy_top1', top1.avg, epoch)
    writer.add_scalar('test/accuracy_top5', top5.avg, epoch)
    return top1.avg, top5.avg

In [None]:
train_loss_results = []
train_contrastive_loss_results = []
best_loss = 80
best_accuracy = 0

criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(),
                            lr=opt.learning_rate,
                            momentum=opt.momentum,
                            weight_decay=opt.weight_decay)

for epoch in tqdm(range(1, opt.epochs + 1)):

  adjust_learning_rate(opt, optimizer, epoch)

  train(model, train_loader, optimizer, criterion, opt, epoch, writer)

  top1, top5 = validation(model, test_loader, criterion, writer, epoch)
  state = {'epoch': epoch,
           'model': opt.model,
           'state_dict': model.state_dict(),
           'best_acc1': best_accuracy,
           'optimizer' : optimizer.state_dict(),}

  torch.save(state, f"{opt.checkpoint_dir}/{opt.model_path}_last.pth.tar")
  if best_accuracy < top1:
    best_accuracy = top1
    print(best_accuracy)
    shutil.copyfile(f"{opt.checkpoint_dir}/{opt.model_path}_last.pth.tar",
                    f"{opt.checkpoint_dir}/{opt.model_path}_best.pth.tar")



  0%|          | 0/300 [00:00<?, ?it/s]