# DenseNet-22 final Train code

### ex 1 : train dataset 150,000 = original + random crop + horizontal flip
##### (random seed = 42)

## - CIFAR-100 데이터 다운로드 및 전처리 

In [1]:
import os
import urllib.request
import tarfile
import pickle
import numpy as np

np.random.seed(42)  # ex1 random seed

def download_cifar100(save_path='cifar-100-python'):
    if os.path.exists(save_path):
        print("CIFAR-100 already downloaded.")
        return

    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    filename = 'cifar-100-python.tar.gz'
    print("Downloading CIFAR-100...")
    urllib.request.urlretrieve(url, filename)

    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall()
    os.remove(filename)
    print("Download and extraction completed.")

def load_batch(filepath):
    with open(filepath, 'rb') as f:
        data_dict = pickle.load(f, encoding='bytes')
    data = data_dict[b'data']
    fine_labels = np.array(data_dict[b'fine_labels'])
    data = data.reshape(-1, 3, 32, 32)
    return data, fine_labels

def normalize_images(images):
    return images.astype(np.float32) / 255.0

def split_validation(images, labels, val_ratio=0.1):
    num_samples = images.shape[0]
    val_size = int(num_samples * val_ratio)

    idx = np.random.permutation(num_samples)
    images = images[idx]
    labels = labels[idx]

    val_images = images[:val_size]
    val_labels = labels[:val_size]
    train_images = images[val_size:]
    train_labels = labels[val_size:]

    return (train_images, train_labels), (val_images, val_labels)

def random_crop(x, crop_size=32, padding=4):
    n, c, h, w = x.shape
    padded = np.pad(x, ((0, 0), (0, 0), (padding, padding), (padding, padding)), mode='reflect')
    cropped = np.empty((n, c, crop_size, crop_size), dtype=x.dtype)
    for i in range(n):
        top = np.random.randint(0, padding * 2 + 1)
        left = np.random.randint(0, padding * 2 + 1)
        cropped[i] = padded[i, :, top:top+crop_size, left:left+crop_size]
    return cropped

def horizontal_flip(x):
    return x[:, :, :, ::-1]

def load_cifar100_dataset():
    download_cifar100()
    train_data, train_fine = load_batch('cifar-100-python/train')
    test_data, test_fine = load_batch('cifar-100-python/test')
    train_data = normalize_images(train_data)
    test_data = normalize_images(test_data)
    return (train_data, train_fine), (test_data, test_fine)

def generate_augmented_dataset(images, labels, target_size):
    N = images.shape[0]
    augmented_images = []
    augmented_labels = []
    repeat = target_size // (N * 2) + 1  

    for _ in range(repeat):
        imgs_crop = random_crop(images.copy())
        imgs_flip = horizontal_flip(imgs_crop.copy())

        augmented_images.append(imgs_crop)
        augmented_labels.append(labels.copy())

        augmented_images.append(imgs_flip)
        augmented_labels.append(labels.copy())

        if sum(x.shape[0] for x in augmented_images) >= target_size:
            break

    X = np.concatenate(augmented_images, axis=0)[:target_size]
    y = np.concatenate(augmented_labels, axis=0)[:target_size]
    return X, y

def prepare_dataset():
    (full_train_images, full_train_labels), (test_images, test_labels) = load_cifar100_dataset()
    print("Generating augmented dataset with crop + flip...")

    X_aug, y_aug = generate_augmented_dataset(full_train_images, full_train_labels, target_size=150000)
    train_aug, val_aug = split_validation(X_aug, y_aug)

    return {
        'train_cropflip': train_aug,
        'val_cropflip': val_aug,
        'test': (test_images, test_labels)
    }

data = prepare_dataset()
for k, v in data.items():
    if isinstance(v, tuple):
        print(f"{k}: {[x.shape for x in v]}")


CIFAR-100 already downloaded.
Generating augmented dataset with crop + flip...
train_cropflip: [(135000, 3, 32, 32), (135000,)]
val_cropflip: [(15000, 3, 32, 32), (15000,)]
test: [(10000, 3, 32, 32), (10000,)]


## - MiniVGGNet 모델 정의

In [2]:
import numpy as np
from common.layers import Convolution, BatchNormalization, Relu, Pooling, Affine
from common.functions import softmax, cross_entropy_error

def fake_quantize(x, num_bits=8):
    qmin, qmax = 0., 2.**num_bits - 1.
    x_min, x_max = np.min(x), np.max(x)
    if x_max == x_min:
        return x
    scale = (x_max - x_min) / (qmax - qmin)
    zero_point = np.clip(np.round(qmin - x_min / scale), qmin, qmax)
    q_x = np.clip(np.round(zero_point + x / scale), qmin, qmax)
    return scale * (q_x - zero_point)

class Flatten:
    def __init__(self):
        self.orig_shape = None

    def forward(self, x):
        self.orig_shape = x.shape
        return x.reshape(x.shape[0], -1)

    def backward(self, dout):
        return dout.reshape(self.orig_shape)


class MiniVGGNet_Modified:
    def __init__(self, input_dim=(3, 32, 32), num_classes=100):
        in_channels, _, _ = input_dim
        weight_std = np.sqrt(2. / in_channels)

        # Block 1
        self.conv1 = Convolution(np.random.randn(64, in_channels, 3, 3) * weight_std, np.zeros(64), stride=1, pad=1)
        self.bn1   = BatchNormalization(np.ones(64), np.zeros(64))
        self.relu1 = Relu()

        self.conv2 = Convolution(np.random.randn(64, 64, 3, 3) * weight_std, np.zeros(64), stride=1, pad=1)
        self.bn2   = BatchNormalization(np.ones(64), np.zeros(64))
        self.relu2 = Relu()
        self.pool1 = Pooling(2, 2, stride=2)

        # Block 2
        self.conv3 = Convolution(np.random.randn(128, 64, 3, 3) * weight_std, np.zeros(128), stride=1, pad=1)
        self.bn3   = BatchNormalization(np.ones(128), np.zeros(128))
        self.relu3 = Relu()

        self.conv4 = Convolution(np.random.randn(128, 128, 3, 3) * weight_std, np.zeros(128), stride=1, pad=1)
        self.bn4   = BatchNormalization(np.ones(128), np.zeros(128))
        self.relu4 = Relu()
        self.pool2 = Pooling(2, 2, stride=2)

        # Block 3
        self.conv5 = Convolution(np.random.randn(256, 128, 3, 3) * weight_std, np.zeros(256), stride=1, pad=1)
        self.bn5   = BatchNormalization(np.ones(256), np.zeros(256)) #conv5 also 
        self.relu5 = Relu()
        self.pool3 = Pooling(2, 2, stride=2)

        # Classifier
        self.flatten = Flatten()
        self.fc1 = Affine(np.random.randn(4096, 512) * weight_std, np.zeros(512))
        self.relu6 = Relu()
        self.fc2 = Affine(np.random.randn(512, num_classes) * 0.01, np.zeros(num_classes))

        self.layers = [
            self.conv1, self.bn1, self.relu1,
            self.conv2, self.bn2, self.relu2, self.pool1,
            self.conv3, self.bn3, self.relu3,
            self.conv4, self.bn4, self.relu4, self.pool2,
            self.conv5, self.bn5, self.relu5, self.pool3, #conv5
            self.flatten, self.fc1, self.relu6, self.fc2
        ]

    def forward(self, x, train_flg=True):
        for layer in self.layers:
            if isinstance(layer, BatchNormalization):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def backward(self, dout):
        # Trainer에서 fc2.backward(dx) 먼저 해줄 것!
        dout = self.relu6.backward(dout)
        dout = self.fc1.backward(dout)
        dout = self.flatten.backward(dout)

        dout = self.pool3.backward(dout)
        dout = self.relu5.backward(dout)
        dout = self.bn5.backward(dout)
        dout = self.conv5.backward(dout)

        dout = self.pool2.backward(dout)
        dout = self.relu4.backward(dout)
        dout = self.bn4.backward(dout)
        dout = self.conv4.backward(dout)

        dout = self.relu3.backward(dout)
        dout = self.bn3.backward(dout)
        dout = self.conv3.backward(dout)

        dout = self.pool1.backward(dout)
        dout = self.relu2.backward(dout)
        dout = self.bn2.backward(dout)
        dout = self.conv2.backward(dout)

        dout = self.relu1.backward(dout)
        dout = self.bn1.backward(dout)
        dout = self.conv1.backward(dout)

        return dout

    def gradient(self, x, t):
        # Forward
        self.loss(x, t)

        # Backward
        dout = 1
        dout = self.fc2.backward(dout)
        dout = self.relu6.backward(dout)
        dout = self.fc1.backward(dout)
        dout = self.flatten.backward(dout)

        dout = self.pool3.backward(dout)
        dout = self.relu5.backward(dout)
        dout = self.bn5.backward(dout)
        dout = self.conv5.backward(dout)

        dout = self.pool2.backward(dout)
        dout = self.relu4.backward(dout)
        dout = self.bn4.backward(dout)
        dout = self.conv4.backward(dout)

        dout = self.relu3.backward(dout)
        dout = self.bn3.backward(dout)
        dout = self.conv3.backward(dout)

        dout = self.pool1.backward(dout)
        dout = self.relu2.backward(dout)
        dout = self.bn2.backward(dout)
        dout = self.conv2.backward(dout)

        dout = self.relu1.backward(dout)
        dout = self.bn1.backward(dout)
        dout = self.conv1.backward(dout)

        # Gather gradients
        grads = {
            'W1': self.conv1.W, 'b1': self.conv1.b,
            'gamma1': self.bn1.gamma, 'beta1': self.bn1.beta,
            'W2': self.conv2.W, 'b2': self.conv2.b,
            'gamma2': self.bn2.gamma, 'beta2': self.bn2.beta,
            'W3': self.conv3.W, 'b3': self.conv3.b,
            'gamma3': self.bn3.gamma, 'beta3': self.bn3.beta,
            'W4': self.conv4.W, 'b4': self.conv4.b,
            'gamma4': self.bn4.gamma, 'beta4': self.bn4.beta,
            'W5': self.conv5.W, 'b5': self.conv5.b,
            'gamma5': self.bn5.gamma, 'beta5': self.bn5.beta,  
            'W6': self.fc1.W, 'b6': self.fc1.b,
            'W7': self.fc2.W, 'b7': self.fc2.b,
        }

        return grads


    def predict(self, x, batch_size=100):
        return np.concatenate([self.forward(x[i:i+batch_size], False) for i in range(0, x.shape[0], batch_size)], axis=0)

    def loss(self, x, t):
        y = self.forward(x, True)
        y_softmax = softmax(y)
    
        #print("logit range before softmax:", np.min(y), np.max(y))  # 로그 찍어봐
        #print("softmax range:", np.min(y_softmax), np.max(y_softmax))
    
        return cross_entropy_error(y_softmax, t)

    def accuracy(self, x, t, batch_size=100):
        pred = np.argmax(self.predict(x, batch_size), axis=1)
        true = t if t.ndim == 1 else np.argmax(t, axis=1)
        return np.mean(pred == true)

    def clip_weights(self, clip_value=1.0): 
        for layer in [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.fc1, self.fc2]:
            layer.W = np.clip(layer.W, -clip_value, clip_value)


## - MiniVGGNet 모델 구조 출력

In [3]:
from common.layers import Convolution, BatchNormalization, Relu, Pooling, Affine
import numpy as np

def count_params(layer):
    count = 0
    if hasattr(layer, 'W'):
        count += np.prod(layer.W.shape)
    if hasattr(layer, 'b'):
        count += np.prod(layer.b.shape)
    if hasattr(layer, 'gamma'):
        count += np.prod(layer.gamma.shape)
    if hasattr(layer, 'beta'):
        count += np.prod(layer.beta.shape)
    return count

def print_vggnet_summary(model, input_shape=(1, 3, 32, 32)):
    print("=" * 75)
    print(f"{'Layer (type)':<35}{'Output Shape':<25}{'Param #':>10}")
    print("=" * 75)

    x = np.zeros(input_shape)
    total_params = 0
    layer_idx = 1

    def log(name, x, p):
        nonlocal total_params, layer_idx
        print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}")
        total_params += p
        layer_idx += 1

    # Block 1
    x = model.conv1.forward(x)
    log("Conv1", x, count_params(model.conv1))
    x = model.bn1.forward(x, train_flg=False)
    log("BN1", x, count_params(model.bn1))
    x = model.relu1.forward(x)

    x = model.conv2.forward(x)
    log("Conv2", x, count_params(model.conv2))
    x = model.bn2.forward(x, train_flg=False)
    log("BN2", x, count_params(model.bn2))
    x = model.relu2.forward(x)
    x = model.pool1.forward(x)

    # Block 2
    x = model.conv3.forward(x)
    log("Conv3", x, count_params(model.conv3))
    x = model.bn3.forward(x, train_flg=False)
    log("BN3", x, count_params(model.bn3))
    x = model.relu3.forward(x)

    x = model.conv4.forward(x)
    log("Conv4", x, count_params(model.conv4))
    x = model.bn4.forward(x, train_flg=False)
    log("BN4", x, count_params(model.bn4))
    x = model.relu4.forward(x)
    x = model.pool2.forward(x)

    # Block 3
    x = model.conv5.forward(x)
    log("Conv5", x, count_params(model.conv5))
    x = model.relu5.forward(x)
    x = model.pool3.forward(x)

    # Flatten
    x = x.reshape(x.shape[0], -1)
    log("Flatten", x, 0)

    # FC layers
    x = model.fc1.forward(x)
    log("FC1", x, count_params(model.fc1))
    x = model.relu6.forward(x)
    x = model.fc2.forward(x)
    log("FC2", x, count_params(model.fc2))

    print("=" * 75)
    print(f"{'Total weight layers:':<60}{layer_idx}")
    print(f"{'Total params:':<60}{total_params:,}")
    print("=" * 75)



model = MiniVGGNet_Modified()
print_vggnet_summary(model)

Layer (type)                       Output Shape                Param #
 1. Conv1                           (1, 64, 32, 32)               1,792
 2. BN1                             (1, 64, 32, 32)                 128
 3. Conv2                           (1, 64, 32, 32)              36,928
 4. BN2                             (1, 64, 32, 32)                 128
 5. Conv3                           (1, 128, 16, 16)             73,856
 6. BN3                             (1, 128, 16, 16)                256
 7. Conv4                           (1, 128, 16, 16)            147,584
 8. BN4                             (1, 128, 16, 16)                256
 9. Conv5                           (1, 256, 8, 8)              295,168
10. Flatten                         (1, 4096)                         0
11. FC1                             (1, 512)                  2,097,664
12. FC2                             (1, 100)                     51,300
Total weight layers:                                        13
To

## - MiniVGGNet 모델학습

In [4]:
import time
import pickle
import numpy as np
from common.optimizer import Adam
from common.functions import softmax

def smooth_labels(y, smoothing=0.1, num_classes=100):
    confidence = 1.0 - smoothing
    label_shape = (y.shape[0], num_classes)
    smooth = np.full(label_shape, smoothing / (num_classes - 1))
    smooth[np.arange(y.shape[0]), y] = confidence
    return smooth

class Trainer:
    def __init__(self, model, model_name,
                 train_data, val_data, test_data,
                 epochs=20, batch_size=64, lr=0.01,
                 smoothing=0.15):
        self.model = model
        self.model_name = model_name
        self.train_x, self.train_t = train_data
        self.val_x, self.val_t = val_data
        self.test_x, self.test_t = test_data
        self.epochs = epochs
        self.batch_size = batch_size
        self.lr = lr
        self.smoothing = smoothing

        self.train_size = self.train_x.shape[0]
        self.iter_per_epoch = max(self.train_size // self.batch_size, 1)
        self.optimizer = Adam(lr=lr)

        self.train_loss_list = []
        self.val_loss_list = []
        self.train_acc_list = []
        self.val_acc_list = []

    def smooth_labels(self, y, num_classes=100):
        confidence = 1.0 - self.smoothing
        label_shape = (y.shape[0], num_classes)
        smooth = np.full(label_shape, self.smoothing / (num_classes - 1), dtype=np.float32)
        smooth[np.arange(y.shape[0]), y] = confidence
        return smooth

    def loss_grad(self, x, t):
        y = self.model.forward(x, train_flg=True)
        batch_size = x.shape[0]
        if t.size == y.size:
            dx = (softmax(y) - t) / batch_size
        else:
            dx = softmax(y)
            dx[np.arange(batch_size), t] -= 1
            dx /= batch_size
        return dx, y

    def get_param_dict_and_grad(self):
        param_dict, grad_dict = {}, {}

        for name in ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc1', 'fc2']:
            layer = getattr(self.model, name)
            if hasattr(layer, 'W'):
                param_dict[f'{name}_W'] = layer.W
                param_dict[f'{name}_b'] = layer.b
                grad_dict[f'{name}_W'] = layer.dW
                grad_dict[f'{name}_b'] = layer.db

        return param_dict, grad_dict


    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.train_x[batch_mask]
        t_batch = self.train_t[batch_mask]

        if t_batch.ndim == 1:
            t_batch = self.smooth_labels(t_batch)

        loss = self.model.loss(x_batch, t_batch)
        dx, _ = self.loss_grad(x_batch, t_batch)
        dout = self.model.fc2.backward(dx)

        # 나머지는 모델 내부 backward 흐름에 위임 (fc2는 제외된 상태여야 함)
        self.model.backward(dout)

        if hasattr(self.model, 'clip_weights'):
            self.model.clip_weights(clip_value=1.0)

        params, grads = self.get_param_dict_and_grad()
        self.optimizer.update(params, grads)

        return loss

    def train(self):
        patience = 10
        best_val_loss = float('inf')
        no_improve_count = 0

        for epoch in range(self.epochs):
            print(f"\n[Epoch {epoch + 1}/{self.epochs}]", flush=True)
            epoch_loss = 0
            start_time = time.time()

            for i in range(self.iter_per_epoch):
                loss = self.train_step()
                epoch_loss += loss
                if i % 10 == 0 or i == self.iter_per_epoch - 1:
                    print(f"  Iter {i+1:3d}/{self.iter_per_epoch}: Loss {loss:.4f}", flush=True)

            avg_loss = epoch_loss / self.iter_per_epoch
            self.train_loss_list.append(avg_loss)

            train_acc = self.model.accuracy(self.train_x[:1000], self.train_t[:1000])
            val_acc = self.model.accuracy(self.val_x, self.val_t)
            val_loss = self.batched_loss(self.val_x, self.val_t, batch_size=128)

            self.train_acc_list.append(train_acc)
            self.val_acc_list.append(val_acc)
            self.val_loss_list.append(val_loss)

            elapsed = time.time() - start_time
            print(f"Fine Train Loss: {avg_loss:.4f}, Fine Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val Loss: {val_loss:.4f}", flush=True)
            print(f"Time: {elapsed:.2f}s", flush=True)

            if (epoch + 1) % 5 == 0:
                self.save_model(f"{self.model_name}_epoch{epoch+1}.pkl")
                print(f">>> Model saved to {self.model_name}_epoch{epoch+1}.pkl", flush=True)

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improve_count = 0
                self.save_model(f"{self.model_name}_best.pkl")
            else:
                no_improve_count += 1
                if no_improve_count >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break

    def batched_loss(self, x, t, batch_size=128):
        total_loss = 0.0
        total_count = 0
        num_classes = 100  # CIFAR-100 기준
    
        for i in range(0, len(x), batch_size):
            x_batch = x[i:i+batch_size]
            t_batch = t[i:i+batch_size]
    
            # 정수형이면 one-hot으로 변환
            if t_batch.ndim == 1:
                t_onehot = np.zeros((t_batch.size, num_classes), dtype=np.float32)
                t_onehot[np.arange(t_batch.size), t_batch] = 1.0
            else:
                t_onehot = t_batch  # 이미 one-hot이면 그대로
    
            loss = self.model.loss(x_batch, t_onehot)
            total_loss += loss * len(x_batch)
            total_count += len(x_batch)
    
        return total_loss / total_count

    def save_model(self, filename):
        params, _ = self.get_param_dict_and_grad()
        model_state = {k: v.copy() for k, v in params.items()}

        optimizer_state = {
            'lr': self.optimizer.lr,
            'beta1': self.optimizer.beta1,
            'beta2': self.optimizer.beta2,
            'm': self.optimizer.m,
            'v': self.optimizer.v,
            't': self.optimizer.iter
        }

        save_data = {
            'model': model_state,
            'optimizer': optimizer_state,
            'train_loss_list': self.train_loss_list,
            'train_acc_list': self.train_acc_list,
            'val_acc_list': self.val_acc_list,
            'val_loss_list': self.val_loss_list
        }

        with open(filename, 'wb') as f:
            pickle.dump(save_data, f)


    def save_log(self, filename='log.npz'):
        np.savez(filename,
                 loss=np.array(self.train_loss_list),
                 train_acc=np.array(self.train_acc_list),
                 val_acc=np.array(self.val_acc_list),
                 val_loss=np.array(self.val_loss_list))
        print(f"Log saved to {filename}", flush=True)


## - MiniVGGNet 모델 학습

In [5]:
def normalize_vgg_style(x):
    mean = np.array([0.5071, 0.4865, 0.4409]).reshape(1, 3, 1, 1)
    std  = np.array([0.2673, 0.2564, 0.2762]).reshape(1, 3, 1, 1)
    return (x - mean) / std

In [None]:
print("\n==== Running ex4 : train dataset 150,000 = original +BN (O) ====")
model = MiniVGGNet_Modified()

x_train, y_train = data['train_cropflip']
x_val, y_val = data['val_cropflip']
x_test, y_test = data['test']

x_train, y_train = x_train[:20000], y_train[:20000]
x_val, y_val     = x_val[:5000], y_val[:5000]
x_test, y_test   = x_test[:10000], y_test[:10000]


x_train = normalize_vgg_style(x_train)
x_val = normalize_vgg_style(x_val)
x_test = normalize_vgg_style(x_test)

trainer = Trainer(
    model=model,
    model_name='vgg_ex4',
    train_data=(x_train, y_train),
    val_data=(x_val, y_val),
    test_data=(x_test, y_test),
    epochs=100,
    batch_size=64,
    lr=0.001,
    smoothing=0.1
)

trainer.train()
trainer.save_log("miniVGG_baseline_ex4_log.npz")



==== Running ex4 : train dataset 150,000 = original +BN (O) ====

[Epoch 1/100]
  Iter   1/312: Loss 14.2077
  Iter  11/312: Loss 12.2191
  Iter  21/312: Loss 8.7956
  Iter  31/312: Loss 6.7306
  Iter  41/312: Loss 5.3680
  Iter  51/312: Loss 4.9275
  Iter  61/312: Loss 4.4535
  Iter  71/312: Loss 4.1258
  Iter  81/312: Loss 4.5980
  Iter  91/312: Loss 3.7955
  Iter 101/312: Loss 4.1181
  Iter 111/312: Loss 4.1928
  Iter 121/312: Loss 4.0127
  Iter 131/312: Loss 4.2075
  Iter 141/312: Loss 3.9046
  Iter 151/312: Loss 3.7905
  Iter 161/312: Loss 4.0537
  Iter 171/312: Loss 3.7469
  Iter 181/312: Loss 3.9985
  Iter 191/312: Loss 3.7527
  Iter 201/312: Loss 3.7788
  Iter 211/312: Loss 3.6829
  Iter 221/312: Loss 3.6891
  Iter 231/312: Loss 3.4636
  Iter 241/312: Loss 3.4301
  Iter 251/312: Loss 3.6785
  Iter 261/312: Loss 3.5381
  Iter 271/312: Loss 3.0091
  Iter 281/312: Loss 3.6153
  Iter 291/312: Loss 3.6438
  Iter 301/312: Loss 3.5661
  Iter 311/312: Loss 3.3126
  Iter 312/312: Loss 

## - 학습 결과 시각화

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from common.util import smooth_curve

log = np.load("miniVGG_baseline_ex4_log.npz")
train_loss = log["loss"]
train_acc = log["train_acc"]
val_acc = log["val_acc"]
val_loss = log["val_loss"]

epochs = range(1, len(train_loss) + 1)
smoothed_epochs = range(1, len(smooth_curve(train_loss)) + 1)

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_loss), label="Train Loss")
plt.plot(smoothed_epochs, smooth_curve(val_loss), label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Learning Curve (Loss)")
plt.grid(True)
plt.show()

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_acc), label="Train Acc")
plt.plot(smoothed_epochs, smooth_curve(val_acc), label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Learning Curve (Accuracy)")
plt.grid(True)
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'miniVGG_baseline_ex2_log.npz'

In [15]:
import matplotlib.pyplot as plt
import numpy as np
from common.util import smooth_curve

log = np.load("miniVGG_baseline_ex3_log.npz")
train_loss = log["loss"]
train_acc = log["train_acc"]
val_acc = log["val_acc"]
val_loss = log["val_loss"]

epochs = range(1, len(train_loss) + 1)
smoothed_epochs = range(1, len(smooth_curve(train_loss)) + 1)

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_loss), label="Train Loss")
plt.plot(smoothed_epochs, smooth_curve(val_loss), label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Learning Curve (Loss)")
plt.grid(True)
plt.savefig("learning_curve_loss.png", dpi=300)
plt.close()

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_acc), label="Train Acc")
plt.plot(smoothed_epochs, smooth_curve(val_acc), label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Learning Curve (Accuracy)")
plt.grid(True)
plt.savefig("learning_curve_accuracy.png", dpi=300)
plt.close()