# DenseNet-22 final Train code

### ex 3 : train dataset 100,000 = original + horizontal flip
##### (random seed = 42)

## - CIFAR-100 데이터 다운로드 및 전처리 

In [None]:
import os
import urllib.request
import tarfile
import pickle
import numpy as np

np.random.seed(42) 

def download_cifar100(save_path='cifar-100-python'):
    if os.path.exists(save_path):
        print("CIFAR-100 already downloaded.")
        return

    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    filename = 'cifar-100-python.tar.gz'
    print("Downloading CIFAR-100...")
    urllib.request.urlretrieve(url, filename)

    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall()
    os.remove(filename)
    print("Download and extraction completed.")

def load_batch(filepath):
    with open(filepath, 'rb') as f:
        data_dict = pickle.load(f, encoding='bytes')
    data = data_dict[b'data']
    fine_labels = np.array(data_dict[b'fine_labels'])
    data = data.reshape(-1, 3, 32, 32)
    return data, fine_labels

def normalize_images(images):
    return images.astype(np.float32) / 255.0

def split_validation(images, labels, val_ratio=0.1):
    num_samples = images.shape[0]
    val_size = int(num_samples * val_ratio)

    idx = np.random.permutation(num_samples)
    images = images[idx]
    labels = labels[idx]

    val_images = images[:val_size]
    val_labels = labels[:val_size]
    train_images = images[val_size:]
    train_labels = labels[val_size:]

    return (train_images, train_labels), (val_images, val_labels)

def random_crop(x, crop_size=32, padding=4):
    n, c, h, w = x.shape
    padded = np.pad(x, ((0, 0), (0, 0), (padding, padding), (padding, padding)), mode='reflect')
    cropped = np.empty((n, c, crop_size, crop_size), dtype=x.dtype)
    for i in range(n):
        top = np.random.randint(0, padding * 2 + 1)
        left = np.random.randint(0, padding * 2 + 1)
        cropped[i] = padded[i, :, top:top+crop_size, left:left+crop_size]
    return cropped

def horizontal_flip(x):
    return x[:, :, :, ::-1]

def load_cifar100_dataset():
    download_cifar100()
    train_data, train_fine = load_batch('cifar-100-python/train')
    test_data, test_fine = load_batch('cifar-100-python/test')
    train_data = normalize_images(train_data)
    test_data = normalize_images(test_data)
    return (train_data, train_fine), (test_data, test_fine)

def generate_augmented_dataset(images, labels, target_size):
    N = images.shape[0]
    augmented_images = []
    augmented_labels = []
    repeat = target_size // (N * 2) + 1  # original + flip

    for _ in range(repeat):
        imgs_original = images.copy()
        imgs_flip = horizontal_flip(images.copy())

        augmented_images.append(imgs_original)
        augmented_labels.append(labels.copy())

        augmented_images.append(imgs_flip)
        augmented_labels.append(labels.copy())

        if sum(x.shape[0] for x in augmented_images) >= target_size:
            break

    X = np.concatenate(augmented_images, axis=0)[:target_size]
    y = np.concatenate(augmented_labels, axis=0)[:target_size]
    return X, y

def prepare_dataset():
    (full_train_images, full_train_labels), (test_images, test_labels) = load_cifar100_dataset()
    print("Generating augmented dataset with horizontal flip only...")

    X_aug, y_aug = generate_augmented_dataset(full_train_images, full_train_labels, target_size=100000)
    train_aug, val_aug = split_validation(X_aug, y_aug)

    return {
        'train_flip': train_aug,
        'val_flip': val_aug,
        'test': (test_images, test_labels)
    }

data = prepare_dataset()
for k, v in data.items():
    if isinstance(v, tuple):
        print(f"{k}: {[x.shape for x in v]}")


CIFAR-100 already downloaded.
Generating augmented dataset with horizontal flip only...
train_flip: [(90000, 3, 32, 32), (90000,)]
val_flip: [(10000, 3, 32, 32), (10000,)]
test: [(10000, 3, 32, 32), (10000,)]


## - DenseNet-22 모델 정의

In [2]:
import numpy as np
from common.layers import Convolution, BatchNormalization, Relu, Affine
from common.functions import softmax, cross_entropy_error
from common.util import im2col, col2im

def fake_quantize(x, num_bits=8):
    qmin, qmax = 0., 2.**num_bits - 1.
    x_min, x_max = np.min(x), np.max(x)
    if x_max == x_min:
        return x
    scale = (x_max - x_min) / (qmax - qmin)
    zero_point = np.clip(np.round(qmin - x_min / scale), qmin, qmax)
    q_x = np.clip(np.round(zero_point + x / scale), qmin, qmax)
    return scale * (q_x - zero_point)

class DenseLayer:
    def __init__(self, in_channels, growth_rate):
        self.bn = BatchNormalization(np.ones(in_channels), np.zeros(in_channels))
        self.relu = Relu()
        self.conv = Convolution(
            np.random.randn(growth_rate, in_channels, 3, 3) * np.sqrt(2. / in_channels),
            np.zeros(growth_rate), stride=1, pad=1)

    def forward(self, x, train_flg=True):
        out = self.bn.forward(x, train_flg)
        out = self.relu.forward(out)
        out = self.conv.forward(out)
        self.out = out
        return np.concatenate([x, out], axis=1)

    def backward(self, dout):
        dx_main = dout[:, -self.out.shape[1]:, :, :]
        dx_input = dout[:, :-self.out.shape[1], :, :]
        dx_main = self.conv.backward(dx_main)
        dx_main = self.relu.backward(dx_main)
        dx_main = self.bn.backward(dx_main)
        return dx_input + dx_main

class TransitionLayer:
    def __init__(self, in_channels):
        out_channels = in_channels // 2
        self.bn = BatchNormalization(np.ones(in_channels), np.zeros(in_channels))
        self.relu = Relu()
        self.conv = Convolution(
            np.random.randn(out_channels, in_channels, 1, 1) * np.sqrt(2. / in_channels),
            np.zeros(out_channels), stride=1, pad=0)
        self.pool = lambda x: x[:, :, ::2, ::2]  # 2x2 average pool (stride=2)

    def forward(self, x, train_flg=True):
        out = self.bn.forward(x, train_flg)
        out = self.relu.forward(out)
        out = self.conv.forward(out)
        return self.pool(out)

    def backward(self, dout):
        N, C, H, W = dout.shape
        d_upsampled = np.zeros((N, C, H*2, W*2))
        d_upsampled[:, :, ::2, ::2] = dout  # unpool
        d_conv = self.conv.backward(d_upsampled)
        d_relu = self.relu.backward(d_conv)
        return self.bn.backward(d_relu)

class DenseBlock:
    def __init__(self, num_layers, in_channels, growth_rate):
        self.layers = []
        channels = in_channels
        for _ in range(num_layers):
            layer = DenseLayer(channels, growth_rate)
            self.layers.append(layer)
            channels += growth_rate
        self.out_channels = channels

    def forward(self, x, train_flg=True):
        for layer in self.layers:
            x = layer.forward(x, train_flg)
        return x

    def backward(self, dout):
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

class DenseNet22:
    def __init__(self, input_dim=(3, 32, 32), num_classes=100, growth_rate=12):
        self.growth_rate = growth_rate
        self.conv1 = Convolution(
            np.random.randn(16, 3, 3, 3) * np.sqrt(2. / 3), np.zeros(16), stride=1, pad=1)
        self.bn1 = BatchNormalization(np.ones(16), np.zeros(16))
        self.relu1 = Relu()

        self.block1 = DenseBlock(6, 16, growth_rate)
        self.trans1 = TransitionLayer(self.block1.out_channels)

        self.block2 = DenseBlock(6, self.block1.out_channels // 2, growth_rate)
        self.trans2 = TransitionLayer(self.block2.out_channels)

        self.block3 = DenseBlock(6, self.block2.out_channels // 2, growth_rate)

        final_channels = self.block3.out_channels
        self.fc = Affine(np.random.randn(final_channels, num_classes) * np.sqrt(2. / final_channels), np.zeros(num_classes))

    def forward(self, x, train_flg=True):
        out = self.relu1.forward(self.bn1.forward(self.conv1.forward(x), train_flg))
        out = self.block1.forward(out, train_flg)
        out = self.trans1.forward(out, train_flg)
        out = self.block2.forward(out, train_flg)
        out = self.trans2.forward(out, train_flg)
        out = self.block3.forward(out, train_flg)
        self.feature_map = out
        out = out.mean(axis=(2, 3))  # global avg pool
        self.pooled = out
        return self.fc.forward(out)

    def backward(self, dout):
        dout = self.fc.backward(dout)
        dout = dout[:, :, None, None]
        dout = dout.repeat(self.feature_map.shape[2], axis=2)
        dout = dout.repeat(self.feature_map.shape[3], axis=3)
        dout = self.block3.backward(dout)
        dout = self.trans2.backward(dout)
        dout = self.block2.backward(dout)
        dout = self.trans1.backward(dout)
        dout = self.block1.backward(dout)
        dout = self.relu1.backward(dout)
        dout = self.bn1.backward(dout)
        return self.conv1.backward(dout)

    def predict(self, x, batch_size=100):
        return np.concatenate([self.forward(x[i:i+batch_size], False) for i in range(0, x.shape[0], batch_size)], axis=0)

    def loss(self, x, t):
        return cross_entropy_error(softmax(self.forward(x, True)), t)

    def accuracy(self, x, t, batch_size=100):
        pred = np.argmax(self.predict(x, batch_size), axis=1)
        true = t if t.ndim == 1 else np.argmax(t, axis=1)
        return np.mean(pred == true)

    def clip_weights(self, clip_value=1.0):
        self.conv1.W = np.clip(self.conv1.W, -clip_value, clip_value)
        self.fc.W = np.clip(self.fc.W, -clip_value, clip_value)
        for block in [self.block1, self.block2, self.block3]:
            for layer in block.layers:
                layer.conv.W = np.clip(layer.conv.W, -clip_value, clip_value)
        for trans in [self.trans1, self.trans2]:
            trans.conv.W = np.clip(trans.conv.W, -clip_value, clip_value)


## - DenseNet-22 모델 구조 출력

In [3]:
import numpy as np

def count_params(layer):
    count = 0
    if hasattr(layer, 'W'):
        count += np.prod(layer.W.shape)
    if hasattr(layer, 'b'):
        count += np.prod(layer.b.shape)
    return count

def print_densenet22_summary(model, input_shape=(1, 3, 32, 32)):
    print("=" * 75, flush=True)
    print(f"{'Layer (type)':<35}{'Output Shape':<25}{'Param #':>10}", flush=True)
    print("=" * 75, flush=True)

    x = np.zeros(input_shape)
    total_params = 0
    layer_idx = 1

    x = model.conv1.forward(x)
    p = count_params(model.conv1)
    print(f"{layer_idx:>2}. {'Conv1':<32}{str(x.shape):<25}{p:>10,}", flush=True)
    total_params += p
    layer_idx += 1

    x = model.bn1.forward(x, train_flg=False)
    x = model.relu1.forward(x)

    for block_idx, (block, trans) in enumerate([(model.block1, model.trans1), (model.block2, model.trans2)]):
        for j, layer in enumerate(block.layers):
            x = layer.forward(x, train_flg=False)
            p = count_params(layer.conv)
            name = f"DenseBlock{block_idx+1}_Layer{j+1}"
            print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}", flush=True)
            total_params += p
            layer_idx += 1

        x = trans.forward(x, train_flg=False)
        p = count_params(trans.conv)
        name = f"Transition{block_idx+1}"
        print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}", flush=True)
        total_params += p
        layer_idx += 1

    for j, layer in enumerate(model.block3.layers):
        x = layer.forward(x, train_flg=False)
        p = count_params(layer.conv)
        name = f"DenseBlock3_Layer{j+1}"
        print(f"{layer_idx:>2}. {name:<32}{str(x.shape):<25}{p:>10,}", flush=True)
        total_params += p
        layer_idx += 1

    x = x.mean(axis=(2, 3))
    print(f"{'':>3} {'GlobalAvgPool':<32}{str(x.shape):<25}{'0':>10}", flush=True)

    x = model.fc.forward(x)
    p = count_params(model.fc)
    print(f"{layer_idx:>2}. {'FC':<32}{str(x.shape):<25}{p:>10,}", flush=True)
    total_params += p

    print("=" * 75, flush=True)
    print(f"{'Total weight layers:':<60}{layer_idx}", flush=True)
    print(f"{'Total params:':<60}{total_params:,}", flush=True)
    print("=" * 75, flush=True)

model = DenseNet22()
print_densenet22_summary(model)

Layer (type)                       Output Shape                Param #
 1. Conv1                           (1, 16, 32, 32)                 448
 2. DenseBlock1_Layer1              (1, 28, 32, 32)               1,740
 3. DenseBlock1_Layer2              (1, 40, 32, 32)               3,036
 4. DenseBlock1_Layer3              (1, 52, 32, 32)               4,332
 5. DenseBlock1_Layer4              (1, 64, 32, 32)               5,628
 6. DenseBlock1_Layer5              (1, 76, 32, 32)               6,924
 7. DenseBlock1_Layer6              (1, 88, 32, 32)               8,220
 8. Transition1                     (1, 44, 16, 16)               3,916
 9. DenseBlock2_Layer1              (1, 56, 16, 16)               4,764
10. DenseBlock2_Layer2              (1, 68, 16, 16)               6,060
11. DenseBlock2_Layer3              (1, 80, 16, 16)               7,356
12. DenseBlock2_Layer4              (1, 92, 16, 16)               8,652
13. DenseBlock2_Layer5              (1, 104, 16, 16)             

## - DenseNet-22 모델 학습

In [None]:
import time
import pickle
import numpy as np
from common.optimizer import Adam
from common.functions import softmax

def smooth_labels(y, smoothing=0.1, num_classes=100):
    confidence = 1.0 - smoothing
    label_shape = (y.shape[0], num_classes)
    smooth = np.full(label_shape, smoothing / (num_classes - 1))
    smooth[np.arange(y.shape[0]), y] = confidence
    return smooth

class Trainer:
    def __init__(self, model, model_name,
                 train_data, val_data, test_data,
                 epochs=20, batch_size=64, lr=0.01,
                 smoothing=0.15):
        self.model = model
        self.model_name = model_name
        self.train_x, self.train_t = train_data
        self.val_x, self.val_t = val_data
        self.test_x, self.test_t = test_data
        self.epochs = epochs
        self.batch_size = batch_size
        self.lr = lr
        self.smoothing = smoothing

        self.train_size = self.train_x.shape[0]
        self.iter_per_epoch = max(self.train_size // self.batch_size, 1)
        self.optimizer = Adam(lr=lr)

        self.train_loss_list = []
        self.val_loss_list = []
        self.train_acc_list = []
        self.val_acc_list = []

    def smooth_labels(self, y, num_classes=100):
        confidence = 1.0 - self.smoothing
        label_shape = (y.shape[0], num_classes)
        smooth = np.full(label_shape, self.smoothing / (num_classes - 1), dtype=np.float32)
        smooth[np.arange(y.shape[0]), y] = confidence
        return smooth

    def loss_grad(self, x, t):
        y = self.model.forward(x, train_flg=True)
        batch_size = x.shape[0]
        if t.size == y.size:
            dx = (softmax(y) - t) / batch_size
        else:
            dx = softmax(y)
            dx[np.arange(batch_size), t] -= 1
            dx /= batch_size
        return dx, y

    def get_param_dict_and_grad(self):
        param_dict, grad_dict = {}, {}
        if hasattr(self.model.fc, 'W'):
            param_dict['fc_W'] = self.model.fc.W
            param_dict['fc_b'] = self.model.fc.b
            grad_dict['fc_W'] = self.model.fc.dW
            grad_dict['fc_b'] = self.model.fc.db

        idx = 0
        for block in [self.model.block1, self.model.block2, self.model.block3]:
            for layer in block.layers:
                if hasattr(layer, 'conv'):
                    param_dict[f'{idx}_W'] = layer.conv.W
                    param_dict[f'{idx}_b'] = layer.conv.b
                    grad_dict[f'{idx}_W'] = layer.conv.dW
                    grad_dict[f'{idx}_b'] = layer.conv.db
                    idx += 1
            if hasattr(block, 'trans'):
                param_dict[f'{idx}_W'] = block.trans.conv.W
                param_dict[f'{idx}_b'] = block.trans.conv.b
                grad_dict[f'{idx}_W'] = block.trans.conv.dW
                grad_dict[f'{idx}_b'] = block.trans.conv.db
                idx += 1
        return param_dict, grad_dict

    def train_step(self):
        batch_mask = np.random.choice(self.train_size, self.batch_size)
        x_batch = self.train_x[batch_mask]
        t_batch = self.train_t[batch_mask]

        if t_batch.ndim == 1:
            t_batch = self.smooth_labels(t_batch)

        loss = self.model.loss(x_batch, t_batch)
        dx, _ = self.loss_grad(x_batch, t_batch)
        self.model.backward(dx)

        if hasattr(self.model, 'clip_weights'):
            self.model.clip_weights(clip_value=1.0)

        params, grads = self.get_param_dict_and_grad()
        self.optimizer.update(params, grads)

        return loss

    def train(self):
        patience = 10
        best_val_loss = float('inf')
        no_improve_count = 0

        for epoch in range(self.epochs):
            print(f"\n[Epoch {epoch + 1}/{self.epochs}]", flush=True)
            epoch_loss = 0
            start_time = time.time()

            for i in range(self.iter_per_epoch):
                loss = self.train_step()
                epoch_loss += loss
                if i % 10 == 0 or i == self.iter_per_epoch - 1:
                    print(f"  Iter {i+1:3d}/{self.iter_per_epoch}: Loss {loss:.4f}", flush=True)

            avg_loss = epoch_loss / self.iter_per_epoch
            self.train_loss_list.append(avg_loss)

            train_acc = self.model.accuracy(self.train_x[:1000], self.train_t[:1000])
            val_acc = self.model.accuracy(self.val_x, self.val_t)
            val_loss = self.batched_loss(self.val_x, self.val_t, batch_size=128)

            self.train_acc_list.append(train_acc)
            self.val_acc_list.append(val_acc)
            self.val_loss_list.append(val_loss)

            elapsed = time.time() - start_time
            print(f"Fine Train Loss: {avg_loss:.4f}, Fine Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val Loss: {val_loss:.4f}", flush=True)
            print(f"Time: {elapsed:.2f}s", flush=True)

            if (epoch + 1) % 5 == 0:
                self.save_model(f"{self.model_name}_epoch{epoch+1}.pkl")
                print(f">>> Model saved to {self.model_name}_epoch{epoch+1}.pkl", flush=True)

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improve_count = 0
                self.save_model(f"{self.model_name}_best.pkl")
            else:
                no_improve_count += 1
                if no_improve_count >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break

    def batched_loss(self, x, t, batch_size=128):
        total_loss = 0.0
        total_count = 0
        for i in range(0, len(x), batch_size):
            x_batch = x[i:i+batch_size]
            t_batch = t[i:i+batch_size]
            loss = self.model.loss(x_batch, t_batch)
            total_loss += loss * len(x_batch)
            total_count += len(x_batch)
        return total_loss / total_count

    def save_model(self, filename):
        params, _ = self.get_param_dict_and_grad()
        model_state = {k: v.copy() for k, v in params.items()}
        model_state['conv1_W'] = self.model.conv1.W.copy()
        model_state['conv1_b'] = self.model.conv1.b.copy()

        bn_state = {}
        idx = 0
        bn_state[f'bn{idx}_gamma'] = self.model.bn1.gamma.copy()
        bn_state[f'bn{idx}_beta'] = self.model.bn1.beta.copy()
        bn_state[f'bn{idx}_running_mean'] = self.model.bn1.running_mean.copy()
        bn_state[f'bn{idx}_running_var'] = self.model.bn1.running_var.copy()
        idx += 1

        for block in [self.model.block1, self.model.block2, self.model.block3]:
            for layer in block.layers:
                bn_state[f'bn{idx}_gamma'] = layer.bn.gamma.copy()
                bn_state[f'bn{idx}_beta'] = layer.bn.beta.copy()
                bn_state[f'bn{idx}_running_mean'] = layer.bn.running_mean.copy()
                bn_state[f'bn{idx}_running_var'] = layer.bn.running_var.copy()
                idx += 1
        for trans in [self.model.trans1, self.model.trans2]:
            bn_state[f'bn{idx}_gamma'] = trans.bn.gamma.copy()
            bn_state[f'bn{idx}_beta'] = trans.bn.beta.copy()
            bn_state[f'bn{idx}_running_mean'] = trans.bn.running_mean.copy()
            bn_state[f'bn{idx}_running_var'] = trans.bn.running_var.copy()
            idx += 1

        optimizer_state = {
            'lr': self.optimizer.lr,
            'beta1': self.optimizer.beta1,
            'beta2': self.optimizer.beta2,
            'm': self.optimizer.m,
            'v': self.optimizer.v,
            't': self.optimizer.iter
        }

        save_data = {
            'model': model_state,
            'bn': bn_state,
            'optimizer': optimizer_state,
            'train_loss_list': self.train_loss_list,
            'train_acc_list': self.train_acc_list,
            'val_acc_list': self.val_acc_list,
            'val_loss_list': self.val_loss_list
        }

        with open(filename, 'wb') as f:
            pickle.dump(save_data, f)

    def save_log(self, filename='log.npz'):
        np.savez(filename,
                 loss=np.array(self.train_loss_list),
                 train_acc=np.array(self.train_acc_list),
                 val_acc=np.array(self.val_acc_list),
                 val_loss=np.array(self.val_loss_list))
        print(f"Log saved to {filename}", flush=True)


## - DenseNet-22_ex1 모델 학습

In [5]:
print("\n==== Running ex3 : train dataset 100,000 = original + horizontal flip ====")
model = DenseNet22()  

x_train, y_train = data['train_flip']
x_val, y_val = data['val_flip']
x_test, y_test = data['test']

trainer = Trainer(
    model=model,
    model_name='DenseNet-22_ex3',
    train_data=(x_train, y_train),
    val_data=(x_val, y_val),
    test_data=(x_test, y_test),
    epochs=100,
    batch_size=64,
    lr=0.001,
    smoothing=0.15
)
trainer.train()
trainer.save_log("DenseNet-22_ex3_log.npz")



==== Running ex3 : train dataset 100,000 = original + horizontal flip ====

[Epoch 1/100]
  Iter   1/1406: Loss 6.0309


KeyboardInterrupt: 

## - 학습 결과 시각화

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from common.util import smooth_curve

log = np.load("DenseNet-22_ex3_log.npz")
train_loss = log["loss"]
train_acc = log["train_acc"]
val_acc = log["val_acc"]
val_loss = log["val_loss"]

epochs = range(1, len(train_loss) + 1)
smoothed_epochs = range(1, len(smooth_curve(train_loss)) + 1)

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_loss), label="Train Loss")
plt.plot(smoothed_epochs, smooth_curve(val_loss), label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Learning Curve (Loss)")
plt.grid(True)
plt.show()

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_acc), label="Train Acc")
plt.plot(smoothed_epochs, smooth_curve(val_acc), label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Learning Curve (Accuracy)")
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from common.util import smooth_curve

log = np.load("DenseNet-22_ex3_log.npz")
train_loss = log["loss"]
train_acc = log["train_acc"]
val_acc = log["val_acc"]
val_loss = log["val_loss"]

epochs = range(1, len(train_loss) + 1)
smoothed_epochs = range(1, len(smooth_curve(train_loss)) + 1)

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_loss), label="Train Loss")
plt.plot(smoothed_epochs, smooth_curve(val_loss), label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Learning Curve (Loss)")
plt.grid(True)
plt.savefig("learning_curve_loss.png", dpi=300)
plt.close()

plt.figure()
plt.plot(smoothed_epochs, smooth_curve(train_acc), label="Train Acc")
plt.plot(smoothed_epochs, smooth_curve(val_acc), label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Learning Curve (Accuracy)")
plt.grid(True)
plt.savefig("learning_curve_accuracy.png", dpi=300)
plt.close()