In [None]:
# CIFAR-100 데이터 불러오기
import numpy as np
import pickle
import os

# 1. 데이터 다운로드 및 압축 풀기
!wget -q https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
!tar -xzf cifar-100-python.tar.gz

# 2. 데이터 로딩 함수
def load_cifar100(data_dir='/content/cifar-100-python'):
    def load_file(filename):
        with open(filename, 'rb') as f:
            data = pickle.load(f, encoding='latin1')
        images = data['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        images = images.astype('float32') / 255.0  # 정규화
        fine_labels = np.array(data['fine_labels'])
        coarse_labels = np.array(data['coarse_labels'])
        return images, fine_labels, coarse_labels

    train_images, train_fine_labels, train_coarse_labels = load_file(os.path.join(data_dir, 'train'))
    test_images, test_fine_labels, test_coarse_labels = load_file(os.path.join(data_dir, 'test'))
    return (train_images, train_fine_labels, train_coarse_labels), (test_images, test_fine_labels, test_coarse_labels)

# 3. 데이터 불러오기
(train_images, train_fine_labels, train_coarse_labels), (test_images, test_fine_labels, test_coarse_labels) = load_cifar100()

# 4. 확인
print("Train images:", train_images.shape)
print("Train fine labels:", train_fine_labels.shape)
print("Test images:", test_images.shape)
print("Test fine labels:", test_fine_labels.shape)

Train images: (50000, 32, 32, 3)
Train fine labels: (50000,)
Test images: (10000, 32, 32, 3)
Test fine labels: (10000,)


In [None]:
import numpy as np

class SimpleMLP:
    def __init__(self, input_size, hidden_sizes, output_size, dropout_rate=0.5):
        self.params = {}
        self.dropout_rate = dropout_rate
        self.init_weights(input_size, hidden_sizes, output_size)

    def init_weights(self, input_size, hidden_sizes, output_size):
        layer_sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            fan_in, fan_out = layer_sizes[i], layer_sizes[i+1]
            limit = np.sqrt(2 / fan_in)  # He 초기화
            self.params[f'W{i+1}'] = np.random.uniform(-limit, limit, (fan_in, fan_out)).astype(np.float32)
            self.params[f'b{i+1}'] = np.zeros((1, fan_out), dtype=np.float32)

    def leaky_relu(self, x, alpha=0.01):
        return np.where(x > 0, x, alpha * x)

    def leaky_relu_derivative(self, x, alpha=0.01):
        dx = np.ones_like(x, dtype=np.float32)
        dx[x < 0] = alpha
        return dx

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True)).astype(np.float32)
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, X, training=True):
        cache = {}
        A = X
        num_layers = len(self.params) // 2

        for i in range(1, num_layers):
            Z = np.dot(A, self.params[f'W{i}']) + self.params[f'b{i}']
            A = self.leaky_relu(Z)
            if training:
                dropout_mask = (np.random.rand(*A.shape) > self.dropout_rate).astype(np.float32)
                A *= dropout_mask
                A /= (1.0 - self.dropout_rate)
                cache[f'dropout_mask{i}'] = dropout_mask
            cache[f'Z{i}'] = Z
            cache[f'A{i}'] = A

        Z_final = np.dot(A, self.params[f'W{num_layers}']) + self.params[f'b{num_layers}']
        A_final = self.softmax(Z_final)
        cache[f'Z{num_layers}'] = Z_final
        cache[f'A{num_layers}'] = A_final

        return A_final, cache

    def compute_loss(self, y_true, y_pred):
        n_samples = y_true.shape[0]
        log_probs = -np.log(y_pred[np.arange(n_samples), y_true] + 1e-8)
        return np.sum(log_probs) / n_samples

    def backward(self, X, y_true, cache):
        grads = {}
        num_layers = len(self.params) // 2
        n_samples = X.shape[0]

        A_final = cache[f'A{num_layers}']
        delta = A_final
        delta[np.arange(n_samples), y_true] -= 1
        delta /= n_samples

        grads[f'W{num_layers}'] = np.dot(cache[f'A{num_layers-1}'].T, delta)
        grads[f'b{num_layers}'] = np.sum(delta, axis=0, keepdims=True)

        for i in reversed(range(1, num_layers)):
            delta = np.dot(delta, self.params[f'W{i+1}'].T) * self.leaky_relu_derivative(cache[f'Z{i}'])
            if f'dropout_mask{i}' in cache:
                delta *= cache[f'dropout_mask{i}']
                delta /= (1.0 - self.dropout_rate)
            A_prev = X if i == 1 else cache[f'A{i-1}']
            grads[f'W{i}'] = np.dot(A_prev.T, delta)
            grads[f'b{i}'] = np.sum(delta, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        num_layers = len(self.params) // 2
        for i in range(1, num_layers + 1):
            self.params[f'W{i}'] -= learning_rate * grads[f'W{i}']
            self.params[f'b{i}'] -= learning_rate * grads[f'b{i}']

    def save(self, filename):
        np.savez(filename, **self.params)

    def load(self, filename):
        data = np.load(filename)
        for key in self.params.keys():
            self.params[key] = data[key]

    def train(self, X_train_full, y_train_full, epochs, learning_rate, batch_size=128, save_path=None, validation_split=0.1):
        n_samples = X_train_full.shape[0]
        n_train = int(n_samples * (1 - validation_split))

     # 1. train/validation 데이터 분리
        X_train = X_train_full[:n_train]
        y_train = y_train_full[:n_train]
        X_val = X_train_full[n_train:]
        y_val = y_train_full[n_train:]

        num_batches = n_train // batch_size

        for epoch in range(1, epochs + 1):
            # train 데이터 셔플
            indices = np.arange(n_train)
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            epoch_loss = 0

            # 2. 미니배치 학습
            for batch_idx in range(num_batches):
                start = batch_idx * batch_size
                end = start + batch_size
                X_batch = X_train[start:end]
                y_batch = y_train[start:end]

                y_pred, cache = self.forward(X_batch, training=True)
                loss = self.compute_loss(y_batch, y_pred)
                grads = self.backward(X_batch, y_batch, cache)
                self.update_params(grads, learning_rate)

                epoch_loss += loss

            # 3. 에포크 끝나고 validation 평가
            avg_train_loss = epoch_loss / num_batches
            train_acc = self.evaluate(X_train, y_train)
            val_acc = self.evaluate(X_val, y_val)

            y_val_pred, _ = self.forward(X_val, training=False)
            val_loss = self.compute_loss(y_val, y_val_pred)

            if save_path is not None:
                self.save(save_path)

            print(f"Epoch {epoch}: Train Loss={avg_train_loss:.4f}, Val Loss={val_loss:.4f}, Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")

    def evaluate(self, X, y_true):
        y_pred, _ = self.forward(X, training=False)
        predictions = np.argmax(y_pred, axis=1)
        accuracy = np.mean(predictions == y_true)
        return accuracy

In [None]:
import os

# 데이터 준비
X_train = train_images.reshape(-1, 32*32*3).astype(np.float32)  # (50000, 3072)
y_train = train_fine_labels

# 모델 생성
model = SimpleMLP(input_size=32*32*3, hidden_sizes=[1024, 512, 256], output_size=100, dropout_rate=0.5)

# 저장 경로
model_save_path = '/content/optimized_simple_mlp_checkpoint.npz'

# 저장된 모델 불러오기
if os.path.exists(model_save_path):
    print("저장된 모델을 불러옵니다...")
    model.load(model_save_path)
else:
    print("새 모델로 학습을 시작합니다...")

# 학습 설정
epochs = 50
learning_rate = 0.01
batch_size = 128

# 학습 시작
model.train(X_train, y_train, epochs=epochs, learning_rate=learning_rate, batch_size=batch_size, save_path=model_save_path)

# 최종 정확도 출력
final_accuracy = model.evaluate(X_train, y_train)
print(f"\n최종 학습 정확도: {final_accuracy:.4f}")

저장된 모델을 불러옵니다...
Epoch 1: Train Loss=3.6719, Val Loss=3.5166, Train Acc=0.1752, Val Acc=0.1718
Epoch 2: Train Loss=3.6659, Val Loss=3.5154, Train Acc=0.1783, Val Acc=0.1736
Epoch 3: Train Loss=3.6653, Val Loss=3.5156, Train Acc=0.1786, Val Acc=0.1752
Epoch 4: Train Loss=3.6605, Val Loss=3.5067, Train Acc=0.1821, Val Acc=0.1740
Epoch 5: Train Loss=3.6440, Val Loss=3.5045, Train Acc=0.1810, Val Acc=0.1784
Epoch 6: Train Loss=3.6432, Val Loss=3.4951, Train Acc=0.1834, Val Acc=0.1798
Epoch 7: Train Loss=3.6322, Val Loss=3.4968, Train Acc=0.1848, Val Acc=0.1790
Epoch 8: Train Loss=3.6293, Val Loss=3.4883, Train Acc=0.1844, Val Acc=0.1790
Epoch 9: Train Loss=3.6246, Val Loss=3.4820, Train Acc=0.1855, Val Acc=0.1788
Epoch 10: Train Loss=3.6151, Val Loss=3.4769, Train Acc=0.1912, Val Acc=0.1876
Epoch 11: Train Loss=3.6118, Val Loss=3.4701, Train Acc=0.1905, Val Acc=0.1852
Epoch 12: Train Loss=3.6020, Val Loss=3.4717, Train Acc=0.1911, Val Acc=0.1836
Epoch 13: Train Loss=3.5924, Val Loss=3.4622