# ResNet-20 final Train code

### ex 3 : train dataset 100,000 = original + horizontal crop
##### (random seed = 104729)

## - CIFAR-100 데이터 다운로드 및 전처리 

In [1]:
import os
import urllib.request
import tarfile
import pickle
import numpy as np

np.random.seed(104729)  # ex3 random seed

def download_cifar100(save_path='cifar-100-python'):
    if os.path.exists(save_path):
        print("CIFAR-100 already downloaded.")
        return

    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    filename = 'cifar-100-python.tar.gz'
    print("Downloading CIFAR-100...")
    urllib.request.urlretrieve(url, filename)

    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall()
    os.remove(filename)
    print("Download and extraction completed.")

def load_batch(filepath):
    with open(filepath, 'rb') as f:
        data_dict = pickle.load(f, encoding='bytes')
    data = data_dict[b'data']
    fine_labels = np.array(data_dict[b'fine_labels'])
    data = data.reshape(-1, 3, 32, 32)
    return data, fine_labels

def normalize_images(images):
    return images.astype(np.float32) / 255.0

def split_validation(images, labels, val_ratio=0.1):
    num_samples = images.shape[0]
    val_size = int(num_samples * val_ratio)

    idx = np.random.permutation(num_samples)
    images = images[idx]
    labels = labels[idx]

    val_images = images[:val_size]
    val_labels = labels[:val_size]
    train_images = images[val_size:]
    train_labels = labels[val_size:]

    return (train_images, train_labels), (val_images, val_labels)

def random_crop(x, crop_size=32, padding=4):
    n, c, h, w = x.shape
    padded = np.pad(x, ((0, 0), (0, 0), (padding, padding), (padding, padding)), mode='reflect')
    cropped = np.empty((n, c, crop_size, crop_size), dtype=x.dtype)
    for i in range(n):
        top = np.random.randint(0, padding * 2 + 1)
        left = np.random.randint(0, padding * 2 + 1)
        cropped[i] = padded[i, :, top:top+crop_size, left:left+crop_size]
    return cropped

def horizontal_flip(x):
    return x[:, :, :, ::-1]

def load_cifar100_dataset():
    download_cifar100()
    train_data, train_fine = load_batch('cifar-100-python/train')
    test_data, test_fine = load_batch('cifar-100-python/test')
    train_data = normalize_images(train_data)
    test_data = normalize_images(test_data)
    return (train_data, train_fine), (test_data, test_fine)

def generate_augmented_dataset(images, labels, target_size):
    N = images.shape[0]
    augmented_images = []
    augmented_labels = []
    repeat = target_size // (N * 2) + 1  # original + flip

    for _ in range(repeat):
        imgs_original = images.copy()
        imgs_flip = horizontal_flip(images.copy())

        augmented_images.append(imgs_original)
        augmented_labels.append(labels.copy())

        augmented_images.append(imgs_flip)
        augmented_labels.append(labels.copy())

        if sum(x.shape[0] for x in augmented_images) >= target_size:
            break

    X = np.concatenate(augmented_images, axis=0)[:target_size]
    y = np.concatenate(augmented_labels, axis=0)[:target_size]
    return X, y

def prepare_dataset():
    (full_train_images, full_train_labels), (test_images, test_labels) = load_cifar100_dataset()
    print("Generating augmented dataset with horizontal flip only...")

    X_aug, y_aug = generate_augmented_dataset(full_train_images, full_train_labels, target_size=100000)
    train_aug, val_aug = split_validation(X_aug, y_aug)

    return {
        'train_flip': train_aug,
        'val_flip': val_aug,
        'test': (test_images, test_labels)
    }

data = prepare_dataset()
for k, v in data.items():
    if isinstance(v, tuple):
        print(f"{k}: {[x.shape for x in v]}")


CIFAR-100 already downloaded.
Generating augmented dataset with horizontal flip only...
train_flip: [(90000, 3, 32, 32), (90000,)]
val_flip: [(10000, 3, 32, 32), (10000,)]
test: [(10000, 3, 32, 32), (10000,)]


## - ResNet-20 모델 구조 출력

In [2]:
from common.ResNet20 import ResNet20

def count_params(layer):
    count = 0
    if hasattr(layer, 'W'):
        count += np.prod(layer.W.shape)
    if hasattr(layer, 'b'):
        count += np.prod(layer.b.shape)
    return count

def print_resnet20_summary(model, input_shape=(1, 3, 32, 32)):
    print("=" * 75, flush=True)
    print(f"{'Layer (type)':<35}{'Output Shape':<25}{'Param #':>10}", flush=True)
    print("=" * 75, flush=True)

    x = np.zeros(input_shape)
    total_params = 0
    layer_idx = 1

    x = model.conv1.forward(x)
    p = count_params(model.conv1)
    print(f"{layer_idx:>2}. {'Conv1':<32}{str(x.shape):<25}{p:>10,}", flush=True)
    total_params += p
    layer_idx += 1

    x = model.bn1.forward(x, train_flg=False)
    x = model.relu1.forward(x)

    for i, layer_block in enumerate([model.layer1, model.layer2, model.layer3]):
        for j, block in enumerate(layer_block):
            residual = x.copy()

            # Conv1
            x = block.conv1.forward(x)
            p = count_params(block.conv1)
            name = f"Block[{i+1}-{j+1}]_Conv1"
            print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}", flush=True)
            total_params += p
            layer_idx += 1

            x = block.bn1.forward(x, train_flg=False)
            x = block.relu1.forward(x)

            x = block.conv2.forward(x)
            p = count_params(block.conv2)
            name = f"Block[{i+1}-{j+1}]_Conv2"
            print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}", flush=True)
            total_params += p
            layer_idx += 1

            x = block.bn2.forward(x, train_flg=False)

            if not block.equal_in_out:
                x_sc = block.shortcut.forward(residual)
                p = count_params(block.shortcut)
                name = f"└─ Shortcut[{i+1}-{j+1}]"
                print(f"{'':>3} {name:<32}{str(x_sc.shape):<25}{p:>10,}", flush=True)
                total_params += p
                x = x + x_sc
                x = block.bn_shortcut.forward(x, train_flg=False)
            else:
                x = x + residual

            x = block.relu2.forward(x)

    x = x.mean(axis=(2, 3))
    print(f"{'':>3} {'GlobalAvgPool':<32}{str(x.shape):<25}{'0':>10}", flush=True)

    x = model.fc.forward(x)
    p = count_params(model.fc)
    print(f"{layer_idx:>2}. {'FC':<32}{str(x.shape):<25}{p:>10,}", flush=True)
    total_params += p

    print("=" * 75, flush=True)
    print(f"{'Total weight layers:':<60}{'20'}", flush=True)
    print(f"{'Total params:':<60}{total_params:,}", flush=True)
    print("=" * 75, flush=True)

model = ResNet20()
print_resnet20_summary(model, input_shape=(1, 3, 32, 32))

Layer (type)                       Output Shape                Param #
 1. Conv1                           (1, 16, 32, 32)                 448
 2. Block[1-1]_Conv1                (1, 16, 32, 32)               2,320
 3. Block[1-1]_Conv2                (1, 16, 32, 32)               2,320
 4. Block[1-2]_Conv1                (1, 16, 32, 32)               2,320
 5. Block[1-2]_Conv2                (1, 16, 32, 32)               2,320
 6. Block[1-3]_Conv1                (1, 16, 32, 32)               2,320
 7. Block[1-3]_Conv2                (1, 16, 32, 32)               2,320
 8. Block[2-1]_Conv1                (1, 32, 16, 16)               4,640
 9. Block[2-1]_Conv2                (1, 32, 16, 16)               9,248
    └─ Shortcut[2-1]                (1, 32, 16, 16)                 544
10. Block[2-2]_Conv1                (1, 32, 16, 16)               9,248
11. Block[2-2]_Conv2                (1, 32, 16, 16)               9,248
12. Block[2-3]_Conv1                (1, 32, 16, 16)              

## - ResNet-20 모델 학습

In [3]:
import time
from common.optimizer import Adam
from common.functions import softmax

# label smoothing
def smooth_labels(y, smoothing=0.1, num_classes=100):
    confidence = 1.0 - smoothing
    label_shape = (y.shape[0], num_classes)
    smooth = np.full(label_shape, smoothing / (num_classes - 1))
    smooth[np.arange(y.shape[0]), y] = confidence
    return smooth

class Trainer:
    def __init__(self, model, model_name,
                 train_data, val_data, test_data,
                 epochs=20, batch_size=64, lr=0.01,
                 smoothing=0.15):

        self.model = model
        self.model_name = model_name
        self.train_x, self.train_t = train_data
        self.val_x, self.val_t = val_data
        self.test_x, self.test_t = test_data

        self.epochs = epochs
        self.batch_size = batch_size
        self.lr = lr
        self.smoothing = smoothing

        self.train_size = self.train_x.shape[0]
        self.iter_per_epoch = max(self.train_size // self.batch_size, 1)

        self.optimizer = Adam(lr=lr)

        self.train_loss_list = []
        self.val_loss_list = []
        self.train_acc_list = []
        self.val_acc_list = []

    def smooth_labels(self, y, num_classes=100):
        confidence = 1.0 - self.smoothing
        label_shape = (y.shape[0], num_classes)
        smooth = np.full(label_shape, self.smoothing / (num_classes - 1), dtype=np.float32)
        smooth[np.arange(y.shape[0]), y] = confidence
        return smooth

    def loss_grad(self, x, t):
        y = self.model.forward(x, train_flg=True)
        batch_size = x.shape[0]
        if t.size == y.size:
            dx = (softmax(y) - t) / batch_size
        else:
            dx = softmax(y)
            dx[np.arange(batch_size), t] -= 1
            dx /= batch_size
        return dx, y

    def get_param_dict_and_grad(self):
        param_dict, grad_dict = {}, {}
        if hasattr(self.model.fc, 'W'):
            param_dict['fc_W'] = self.model.fc.W
            param_dict['fc_b'] = self.model.fc.b
            grad_dict['fc_W'] = self.model.fc.dW
            grad_dict['fc_b'] = self.model.fc.db

        idx = 0
        for layer in self.model.layer1 + self.model.layer2 + self.model.layer3:
            for attr in ['conv1', 'conv2', 'shortcut']:
                if hasattr(layer, attr):
                    conv = getattr(layer, attr)
                    param_dict[f'{idx}_W'] = conv.W
                    param_dict[f'{idx}_b'] = conv.b
                    grad_dict[f'{idx}_W'] = conv.dW
                    grad_dict[f'{idx}_b'] = conv.db
                    idx += 1
        return param_dict, grad_dict

    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.train_x[batch_mask]
        t_batch = self.train_t[batch_mask]

        if t_batch.ndim == 1:
            t_batch = self.smooth_labels(t_batch)

        loss = self.model.loss(x_batch, t_batch)
        dx, y = self.loss_grad(x_batch, t_batch)
        self.model.backward(dx)

        if hasattr(self.model, 'clip_weights'):
            self.model.clip_weights(clip_value=1.0)

        params, grads = self.get_param_dict_and_grad()
        self.optimizer.update(params, grads)

        return loss

    def train(self):
        patience = 10
        best_val_loss = float('inf')
        no_improve_count = 0
    
        for epoch in range(self.epochs):
            print(f"\n[Epoch {epoch + 1}/{self.epochs}]", flush=True)
            epoch_loss = 0
            start_time = time.time()

            for i in range(self.iter_per_epoch):
                loss = self.train_step()
                epoch_loss += loss
                if i % 10 == 0 or i == self.iter_per_epoch - 1:
                    print(f"  Iter {i+1:3d}/{self.iter_per_epoch}: Loss {loss:.4f}", flush=True)

            avg_loss = epoch_loss / self.iter_per_epoch
            self.train_loss_list.append(avg_loss)

            train_acc = self.model.accuracy(self.train_x[:1000], self.train_t[:1000])
            val_acc = self.model.accuracy(self.val_x, self.val_t)

            val_loss = self.batched_loss(self.val_x, self.val_t, batch_size=128)

            self.train_acc_list.append(train_acc)
            self.val_acc_list.append(val_acc)
            self.val_loss_list.append(val_loss)

            elapsed = time.time() - start_time
            print(f"Fine Train Loss: {avg_loss:.4f}, Fine Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val Loss: {val_loss:.4f}", flush=True)
            print(f"Time: {elapsed:.2f}s", flush=True)

            if (epoch + 1) % 5 == 0:
                model_filename = f"{self.model_name}_epoch{epoch+1}.pkl"
                self.save_model(model_filename)
                print(f">>> Model saved to {model_filename}", flush=True)
            
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improve_count = 0
                self.save_model(f"{self.model_name}_best.pkl")
            else:
                no_improve_count += 1
                if no_improve_count >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
            

    def batched_loss(self, x, t, batch_size=128):
        total_loss = 0.0
        total_count = 0
        for i in range(0, len(x), batch_size):
            x_batch = x[i:i+batch_size]
            t_batch = t[i:i+batch_size]
            loss = self.model.loss(x_batch, t_batch)
            total_loss += loss * len(x_batch)
            total_count += len(x_batch)
        return total_loss / total_count

    def save_model(self, filename):
        params, _ = self.get_param_dict_and_grad()
        model_state = {k: v.copy() for k, v in params.items()}

        model_state['conv1_W'] = self.model.conv1.W.copy()
        model_state['conv1_b'] = self.model.conv1.b.copy()

        def extract_bn_params(model):
            bn_params = {}
            bn_count = 0
            for layer in model.layer1 + model.layer2 + model.layer3:
                for bn_attr in ['bn1', 'bn2']:
                    if hasattr(layer, bn_attr):
                        bn = getattr(layer, bn_attr)
                        bn_params[f'{bn_count}_gamma'] = bn.gamma.copy()
                        bn_params[f'{bn_count}_beta'] = bn.beta.copy()
                        bn_params[f'{bn_count}_running_mean'] = bn.running_mean.copy()
                        bn_params[f'{bn_count}_running_var'] = bn.running_var.copy()
                        bn_count += 1
                if hasattr(layer, 'bn_shortcut'):
                    bn = layer.bn_shortcut
                    bn_params[f'{bn_count}_gamma'] = bn.gamma.copy()
                    bn_params[f'{bn_count}_beta'] = bn.beta.copy()
                    bn_params[f'{bn_count}_running_mean'] = bn.running_mean.copy()
                    bn_params[f'{bn_count}_running_var'] = bn.running_var.copy()
                    bn_count += 1
            bn = model.bn1
            bn_params[f'{bn_count}_gamma'] = bn.gamma.copy()
            bn_params[f'{bn_count}_beta'] = bn.beta.copy()
            bn_params[f'{bn_count}_running_mean'] = bn.running_mean.copy()
            bn_params[f'{bn_count}_running_var'] = bn.running_var.copy()
            return bn_params

        model_state.update(extract_bn_params(self.model))

        optimizer_state = {
            'lr': self.optimizer.lr,
            'beta1': self.optimizer.beta1,
            'beta2': self.optimizer.beta2,
            'm': self.optimizer.m,
            'v': self.optimizer.v,
            't': self.optimizer.iter
        }

        save_data = {
            'model': model_state,
            'optimizer': optimizer_state,
            'train_loss_list': self.train_loss_list,
            'train_acc_list': self.train_acc_list,
            'val_acc_list': self.val_acc_list,
            'val_loss_list': self.val_loss_list
        }

        with open(filename, 'wb') as f:
            pickle.dump(save_data, f)

    def save_log(self, filename='log.npz'):
        np.savez(filename,
                 loss=np.array(self.train_loss_list),
                 train_acc=np.array(self.train_acc_list),
                 val_acc=np.array(self.val_acc_list),
                 val_loss=np.array(self.val_loss_list))
        print(f"Log saved to {filename}", flush=True)

## - ResNet-20_ex1 모델 학습

In [None]:
print("\n==== Running ex3 : train dataset 100,000 = original + horizontal flip ====")
model = ResNet20()

x_train, y_train = data['train_flip']
x_val, y_val = data['val_flip']
x_test, y_test = data['test']

trainer = Trainer(
    model=model,
    model_name='ResNet-20_ex3',
    train_data=(x_train, y_train),
    val_data=(x_val, y_val),
    test_data=(x_test, y_test),
    epochs=100,
    batch_size=64,
    lr=0.01,
    smoothing=0.15
)
trainer.train()
trainer.save_log("ResNet-20_ex3_log.npz")


==== Running ex3 : train dataset 100,000 = original + horizontal flip ====

[Epoch 1/100]
  Iter   1/1406: Loss 5.3838
  Iter  11/1406: Loss 4.6716
  Iter  21/1406: Loss 4.6090
  Iter  31/1406: Loss 4.5481
  Iter  41/1406: Loss 4.5677
  Iter  51/1406: Loss 4.3817
  Iter  61/1406: Loss 4.3026
  Iter  71/1406: Loss 4.0237
  Iter  81/1406: Loss 4.1144
  Iter  91/1406: Loss 4.0962
  Iter 101/1406: Loss 3.9886
  Iter 111/1406: Loss 3.9998
  Iter 121/1406: Loss 4.1299
  Iter 131/1406: Loss 3.8621
  Iter 141/1406: Loss 3.9984
  Iter 151/1406: Loss 4.0580
  Iter 161/1406: Loss 4.1268
  Iter 171/1406: Loss 3.8738
  Iter 181/1406: Loss 4.0512
  Iter 191/1406: Loss 4.0164
  Iter 201/1406: Loss 4.0057
  Iter 211/1406: Loss 3.7973
  Iter 221/1406: Loss 3.9423
  Iter 231/1406: Loss 3.8715
  Iter 241/1406: Loss 3.7846
  Iter 251/1406: Loss 3.8666
  Iter 261/1406: Loss 3.9779
  Iter 271/1406: Loss 3.9549
  Iter 281/1406: Loss 4.0291
  Iter 291/1406: Loss 3.9358
  Iter 301/1406: Loss 3.4741
  Iter 311

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from common.util import smooth_curve

log = np.load("ResNet-20_ex3_log.npz")
train_loss = log["loss"]
train_acc = log["train_acc"]
val_acc = log["val_acc"]
val_loss = log["val_loss"]

epochs = range(1, len(train_loss) + 1)
smoothed_epochs = range(1, len(smooth_curve(train_loss)) + 1)

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_loss), label="Train Loss (smoothed)")
plt.plot(smoothed_epochs, smooth_curve(val_loss), label="Val Loss (smoothed)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Learning Curve (Loss)")
plt.grid(True)
plt.show()

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_acc), label="Train Acc (smoothed)")
plt.plot(smoothed_epochs, smooth_curve(val_acc), label="Val Acc (smoothed)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Learning Curve (Accuracy)")
plt.grid(True)
plt.show()